Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RHistStats.hxx
Go to the documentation of this file.
1/// \file
2/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
3/// Feedback is welcome!
4
5#ifndef ROOT_RHistStats
6#define ROOT_RHistStats
7
8#include "RHistUtils.hxx"
10#include "RWeight.hxx"
11
12#include <cmath>
13#include <cstdint>
14#include <stdexcept>
15#include <tuple>
16#include <vector>
17
18class TBuffer;
19
20namespace ROOT {
21namespace Experimental {
22
23/**
24Histogram statistics of unbinned values.
25
26Every call to \ref Fill(const A &... args) "Fill" updates sums to compute the number of effective entries as well as the
27arithmetic mean and other statistical quantities per dimension:
28\code
29ROOT::Experimental::RHistStats stats(1);
30stats.Fill(8.5);
31stats.Fill(1.5);
32// stats.ComputeMean() will return 5.0
33\endcode
34
35\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
36Feedback is welcome!
37*/
39public:
40 /// Statistics for one dimension.
42 double fSumWX = 0.0;
43 double fSumWX2 = 0.0;
44 double fSumWX3 = 0.0;
45 double fSumWX4 = 0.0;
46
47 void Add(double x)
48 {
49 fSumWX += x;
50 fSumWX2 += x * x;
51 fSumWX3 += x * x * x;
52 fSumWX4 += x * x * x * x;
53 }
54
55 void Add(double x, double w)
56 {
57 fSumWX += w * x;
58 fSumWX2 += w * x * x;
59 fSumWX3 += w * x * x * x;
60 fSumWX4 += w * x * x * x * x;
61 }
62
64 {
65 fSumWX += other.fSumWX;
66 fSumWX2 += other.fSumWX2;
67 fSumWX3 += other.fSumWX3;
68 fSumWX4 += other.fSumWX4;
69 }
70
71 void Clear()
72 {
73 fSumWX = 0.0;
74 fSumWX2 = 0.0;
75 fSumWX3 = 0.0;
76 fSumWX4 = 0.0;
77 }
78
79 void Scale(double factor)
80 {
81 fSumWX *= factor;
82 fSumWX2 *= factor;
83 fSumWX3 *= factor;
84 fSumWX4 *= factor;
85 }
86 };
87
88private:
89 /// The number of entries
90 std::uint64_t fNEntries = 0;
91 /// The sum of weights
92 double fSumW = 0.0;
93 /// The sum of weights squared
94 double fSumW2 = 0.0;
95 /// The sums per dimension
96 std::vector<RDimensionStats> fDimensionStats;
97
98public:
99 /// Construct a statistics object.
100 ///
101 /// \param[in] nDimensions the number of dimensions, must be > 0
102 explicit RHistStats(std::size_t nDimensions)
103 {
104 if (nDimensions == 0) {
105 throw std::invalid_argument("nDimensions must be > 0");
106 }
108 }
109
110 std::size_t GetNDimensions() const { return fDimensionStats.size(); }
111
112 std::uint64_t GetNEntries() const { return fNEntries; }
113 double GetSumW() const { return fSumW; }
114 double GetSumW2() const { return fSumW2; }
115
116 const RDimensionStats &GetDimensionStats(std::size_t dim = 0) const { return fDimensionStats.at(dim); }
117
118 /// Add all entries from another statistics object.
119 ///
120 /// Throws an exception if the number of dimensions are not identical.
121 ///
122 /// \param[in] other another statistics object
123 void Add(const RHistStats &other)
124 {
125 if (fDimensionStats.size() != other.fDimensionStats.size()) {
126 throw std::invalid_argument("number of dimensions not identical in Add");
127 }
128 fNEntries += other.fNEntries;
129 fSumW += other.fSumW;
130 fSumW2 += other.fSumW2;
131 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
132 fDimensionStats[i].Add(other.fDimensionStats[i]);
133 }
134 }
135
136 /// Clear this statistics object.
137 void Clear()
138 {
139 fNEntries = 0;
140 fSumW = 0;
141 fSumW2 = 0;
142 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
143 fDimensionStats[i].Clear();
144 }
145 }
146
147 /// Compute the number of effective entries.
148 ///
149 /// \f[
150 /// \frac{(\sum w_i)^2}{\sum w_i^2}
151 /// \f]
152 ///
153 /// \return the number of effective entries
155 {
156 if (fSumW2 == 0) {
157 return 0;
158 }
159 return fSumW * fSumW / fSumW2;
160 }
161
162 /// Compute the arithmetic mean of unbinned values.
163 ///
164 /// \f[
165 /// \mu = \frac{\sum w_i \cdot x_i}{\sum w_i}
166 /// \f]
167 ///
168 /// \param[in] dim the dimension index, starting at 0
169 /// \return the arithmetic mean of unbinned values
170 double ComputeMean(std::size_t dim = 0) const
171 {
172 // First get the statistics, which includes checking the argument.
173 auto &stats = fDimensionStats.at(dim);
174 if (fSumW == 0) {
175 return 0;
176 }
177 return stats.fSumWX / fSumW;
178 }
179
180 /// Compute the variance of unbinned values.
181 ///
182 /// This function computes the uncorrected sample variance:
183 /// \f[
184 /// \sigma^2 = \frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2
185 /// \f]
186 /// With some rewriting, this is equivalent to:
187 /// \f[
188 /// \sigma^2 = \frac{\sum w_i \cdot x_i^2}{\sum w_i} - \mu^2
189 /// \f]
190 ///
191 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
192 /// In other words, the return value is a biased estimation lower than the actual population variance.
193 ///
194 /// \param[in] dim the dimension index, starting at 0
195 /// \return the variance of unbinned values
196 double ComputeVariance(std::size_t dim = 0) const
197 {
198 // First get the statistics, which includes checking the argument.
199 auto &stats = fDimensionStats.at(dim);
200 if (fSumW == 0) {
201 return 0;
202 }
203 double mean = ComputeMean(dim);
204 return stats.fSumWX2 / fSumW - mean * mean;
205 }
206
207 /// Compute the standard deviation of unbinned values.
208 ///
209 /// This function computes the uncorrected sample standard deviation:
210 /// \f[
211 /// \sigma = \sqrt{\frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2}
212 /// \f]
213 /// With some rewriting, this is equivalent to:
214 /// \f[
215 /// \sigma = \sqrt{\frac{\sum w_i \cdot x_i^2}{\sum w_i} - \frac{(\sum w_i \cdot x_i)^2}{(\sum w_i)^2}}
216 /// \f]
217 ///
218 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
219 /// In other words, the return value is a biased estimation lower than the actual population standard deviation.
220 ///
221 /// \param[in] dim the dimension index, starting at 0
222 /// \return the standard deviation of unbinned values
223 double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); }
224
225 // clang-format off
226 /// Compute the skewness of unbinned values.
227 ///
228 /// The skewness is the third standardized moment:
229 /// \f[
230 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right]
231 /// \f]
232 /// With support for weighted filling and after some rewriting, it is computed as:
233 /// \f[
234 /// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3}
235 /// \f]
236 ///
237 /// \param[in] dim the dimension index, starting at 0
238 /// \return the skewness of unbinned values
239 // clang-format on
240 double ComputeSkewness(std::size_t dim = 0) const
241 {
242 // First get the statistics, which includes checking the argument.
243 auto &stats = fDimensionStats.at(dim);
244 if (fSumW == 0) {
245 return 0;
246 }
247 double mean = ComputeMean(dim);
248 double var = ComputeVariance(dim);
249 if (var == 0) {
250 return 0;
251 }
252 double EWX3 = stats.fSumWX3 / fSumW;
253 double EWX2 = stats.fSumWX2 / fSumW;
254 return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5);
255 }
256
257 // clang-format off
258 /// Compute the (excess) kurtosis of unbinned values.
259 ///
260 /// The kurtosis is based on the fourth standardized moment:
261 /// \f[
262 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right]
263 /// \f]
264 /// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution:
265 /// \f[
266 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3
267 /// \f]
268 ///
269 /// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as:
270 /// \f[
271 /// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3
272 /// \f]
273 ///
274 /// \param[in] dim the dimension index, starting at 0
275 /// \return the (excess) kurtosis of unbinned values
276 // clang-format on
277 double ComputeKurtosis(std::size_t dim = 0) const
278 {
279 // First get the statistics, which includes checking the argument.
280 auto &stats = fDimensionStats.at(dim);
281 if (fSumW == 0) {
282 return 0;
283 }
284 double mean = ComputeMean(dim);
285 double var = ComputeVariance(dim);
286 if (var == 0) {
287 return 0;
288 }
289 double EWX4 = stats.fSumWX4 / fSumW;
290 double EWX3 = stats.fSumWX3 / fSumW;
291 double EWX2 = stats.fSumWX2 / fSumW;
292 return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3;
293 }
294
295private:
296 template <std::size_t I, typename... A>
297 void FillImpl(const std::tuple<A...> &args)
298 {
299 fDimensionStats[I].Add(std::get<I>(args));
300 if constexpr (I + 1 < sizeof...(A)) {
301 FillImpl<I + 1>(args);
302 }
303 }
304
305 template <std::size_t I, std::size_t N, typename... A>
306 void FillImpl(const std::tuple<A...> &args, double w)
307 {
308 fDimensionStats[I].Add(std::get<I>(args), w);
309 if constexpr (I + 1 < N) {
310 FillImpl<I + 1, N>(args, w);
311 }
312 }
313
314public:
315 /// Fill an entry into this statistics object.
316 ///
317 /// \code
318 /// ROOT::Experimental::RHistStats stats(2);
319 /// auto args = std::make_tuple(8.5, 10.5);
320 /// stats.Fill(args);
321 /// \endcode
322 ///
323 /// Throws an exception if the number of arguments does not match the number of dimensions.
324 ///
325 /// \param[in] args the arguments for each dimension
326 /// \par See also
327 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
328 /// \ref Fill(const std::tuple<A...> &args, RWeight weight) "overload for weighted filling"
329 template <typename... A>
330 void Fill(const std::tuple<A...> &args)
331 {
332 if (sizeof...(A) != fDimensionStats.size()) {
333 throw std::invalid_argument("invalid number of arguments to Fill");
334 }
335 fNEntries++;
336 fSumW++;
337 fSumW2++;
338 FillImpl<0>(args);
339 }
340
341 /// Fill an entry into this statistics object with a weight.
342 ///
343 /// \code
344 /// ROOT::Experimental::RHistStats stats(2);
345 /// auto args = std::make_tuple(8.5, 10.5);
346 /// stats.Fill(args, ROOT::Experimental::RWeight(0.8));
347 /// \endcode
348 ///
349 /// \param[in] args the arguments for each dimension
350 /// \param[in] weight the weight for this entry
351 /// \par See also
352 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
353 /// \ref Fill(const std::tuple<A...> &args) "overload for unweighted filling"
354 template <typename... A>
355 void Fill(const std::tuple<A...> &args, RWeight weight)
356 {
357 if (sizeof...(A) != fDimensionStats.size()) {
358 throw std::invalid_argument("invalid number of arguments to Fill");
359 }
360 fNEntries++;
361 double w = weight.fValue;
362 fSumW += w;
363 fSumW2 += w * w;
364 FillImpl<0, sizeof...(A)>(args, w);
365 }
366
367 /// Fill an entry into this statistics object.
368 ///
369 /// \code
370 /// ROOT::Experimental::RHistStats stats(2);
371 /// stats.Fill(8.5, 10.5);
372 /// \endcode
373 /// For weighted filling, pass an RWeight as the last argument:
374 /// \code
375 /// ROOT::Experimental::RHistStats stats(2);
376 /// stats.Fill(8.5, 10.5, ROOT::Experimental::RWeight(0.8));
377 /// \endcode
378 ///
379 /// Throws an exception if the number of arguments does not match the number of dimensions.
380 ///
381 /// \param[in] args the arguments for each dimension
382 /// \par See also
383 /// the function overloads accepting `std::tuple` \ref Fill(const std::tuple<A...> &args) "for unweighted filling"
384 /// and \ref Fill(const std::tuple<A...> &args, RWeight) "for weighted filling"
385 template <typename... A>
386 void Fill(const A &...args)
387 {
388 auto t = std::forward_as_tuple(args...);
389 if constexpr (std::is_same_v<typename Internal::LastType<A...>::type, RWeight>) {
390 static constexpr std::size_t N = sizeof...(A) - 1;
391 if (N != fDimensionStats.size()) {
392 throw std::invalid_argument("invalid number of arguments to Fill");
393 }
394 fNEntries++;
395 double w = std::get<N>(t).fValue;
396 fSumW += w;
397 fSumW2 += w * w;
398 FillImpl<0, N>(t, w);
399 } else {
400 Fill(t);
401 }
402 }
403
404 /// Scale the histogram statistics.
405 ///
406 /// \param[in] factor the scale factor
407 void Scale(double factor)
408 {
409 fSumW *= factor;
410 fSumW2 *= factor * factor;
411 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
412 fDimensionStats[i].Scale(factor);
413 }
414 }
415
416 /// %ROOT Streamer function to throw when trying to store an object of this class.
417 void Streamer(TBuffer &) { throw std::runtime_error("unable to store RHistStats"); }
418};
419
420} // namespace Experimental
421} // namespace ROOT
422
423#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Histogram statistics of unbinned values.
void Add(const RHistStats &other)
Add all entries from another statistics object.
void Fill(const std::tuple< A... > &args, RWeight weight)
Fill an entry into this statistics object with a weight.
void FillImpl(const std::tuple< A... > &args)
double ComputeSkewness(std::size_t dim=0) const
Compute the skewness of unbinned values.
void Fill(const A &...args)
Fill an entry into this statistics object.
std::vector< RDimensionStats > fDimensionStats
The sums per dimension.
void Clear()
Clear this statistics object.
void FillImpl(const std::tuple< A... > &args, double w)
double fSumW
The sum of weights.
std::size_t GetNDimensions() const
const RDimensionStats & GetDimensionStats(std::size_t dim=0) const
double fSumW2
The sum of weights squared.
void Scale(double factor)
Scale the histogram statistics.
void Fill(const std::tuple< A... > &args)
Fill an entry into this statistics object.
double ComputeNEffectiveEntries() const
Compute the number of effective entries.
double ComputeVariance(std::size_t dim=0) const
Compute the variance of unbinned values.
void Streamer(TBuffer &)
ROOT Streamer function to throw when trying to store an object of this class.
std::uint64_t GetNEntries() const
double ComputeMean(std::size_t dim=0) const
Compute the arithmetic mean of unbinned values.
double ComputeKurtosis(std::size_t dim=0) const
Compute the (excess) kurtosis of unbinned values.
RHistStats(std::size_t nDimensions)
Construct a statistics object.
std::uint64_t fNEntries
The number of entries.
double ComputeStdDev(std::size_t dim=0) const
Compute the standard deviation of unbinned values.
Buffer base class used for serializing objects.
Definition TBuffer.h:43
Double_t x[n]
Definition legend1.C:17
#define I(x, y, z)
void Add(const RDimensionStats &other)
A weight for filling histograms.
Definition RWeight.hxx:17