Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RHistStats.hxx
Go to the documentation of this file.
1/// \file
2/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
3/// Feedback is welcome!
4
5#ifndef ROOT_RHistStats
6#define ROOT_RHistStats
7
8#include "RHistUtils.hxx"
9#include "RWeight.hxx"
10
11#include <cassert>
12#include <cmath>
13#include <cstddef>
14#include <cstdint>
15#include <limits>
16#include <stdexcept>
17#include <tuple>
18#include <type_traits>
19#include <vector>
20
21class TBuffer;
22
23namespace ROOT {
24namespace Experimental {
25
26/**
27Histogram statistics of unbinned values.
28
29Every call to \ref Fill(const A &... args) "Fill" updates sums to compute the number of effective entries as well as the
30arithmetic mean and other statistical quantities per dimension:
31\code
32ROOT::Experimental::RHistStats stats(1);
33stats.Fill(8.5);
34stats.Fill(1.5);
35// stats.ComputeMean() will return 5.0
36\endcode
37
38\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
39Feedback is welcome!
40*/
42public:
43 /// Statistics for one dimension.
45 bool fEnabled = true;
46 double fSumWX = 0.0;
47 double fSumWX2 = 0.0;
48 double fSumWX3 = 0.0;
49 double fSumWX4 = 0.0;
50
51 void Add(double x)
52 {
54 fSumWX += x;
55 fSumWX2 += x * x;
56 fSumWX3 += x * x * x;
57 fSumWX4 += x * x * x * x;
58 }
59
60 void Add(double x, double w)
61 {
63 fSumWX += w * x;
64 fSumWX2 += w * x * x;
65 fSumWX3 += w * x * x * x;
66 fSumWX4 += w * x * x * x * x;
67 }
68
70 {
72 fSumWX += other.fSumWX;
73 fSumWX2 += other.fSumWX2;
74 fSumWX3 += other.fSumWX3;
75 fSumWX4 += other.fSumWX4;
76 }
77
78 /// Add another statistics object using atomic instructions.
79 ///
80 /// \param[in] other another statistics object that must not be modified during the operation
89
90 void Clear()
91 {
92 fSumWX = 0.0;
93 fSumWX2 = 0.0;
94 fSumWX3 = 0.0;
95 fSumWX4 = 0.0;
96 }
97
98 void Scale(double factor)
99 {
101 fSumWX *= factor;
102 fSumWX2 *= factor;
103 fSumWX3 *= factor;
104 fSumWX4 *= factor;
105 }
106 };
107
108private:
109 /// The number of entries
110 std::uint64_t fNEntries = 0;
111 /// The sum of weights
112 double fSumW = 0.0;
113 /// The sum of weights squared
114 double fSumW2 = 0.0;
115 /// The sums per dimension
116 std::vector<RDimensionStats> fDimensionStats;
117
118public:
119 /// Construct a statistics object.
120 ///
121 /// \param[in] nDimensions the number of dimensions, must be > 0
122 explicit RHistStats(std::size_t nDimensions)
123 {
124 if (nDimensions == 0) {
125 throw std::invalid_argument("nDimensions must be > 0");
126 }
128 }
129
130 std::size_t GetNDimensions() const { return fDimensionStats.size(); }
131
132 std::uint64_t GetNEntries() const { return fNEntries; }
133 double GetSumW() const { return fSumW; }
134 double GetSumW2() const { return fSumW2; }
135
136 /// Get the statistics object for one dimension.
137 ///
138 /// Throws an exception if the dimension is disabled.
139 ///
140 /// \param[in] dim the dimension index, starting at 0
141 /// \return the statistics object
142 const RDimensionStats &GetDimensionStats(std::size_t dim = 0) const
143 {
144 const RDimensionStats &stats = fDimensionStats.at(dim);
145 if (!stats.fEnabled) {
146 throw std::invalid_argument("dimension is disabled");
147 }
148 return stats;
149 }
150
151 /// Disable one dimension of this statistics object.
152 ///
153 /// All future calls to Fill will ignore the corresponding argument. Once disabled, a dimension cannot be reenabled.
154 ///
155 /// \param[in] dim the dimension index, starting at 0
156 void DisableDimension(std::size_t dim) { fDimensionStats.at(dim).fEnabled = false; }
157
158 bool IsEnabled(std::size_t dim) const { return fDimensionStats.at(dim).fEnabled; }
159
160 /// Add all entries from another statistics object.
161 ///
162 /// Throws an exception if the number of dimensions are not identical.
163 ///
164 /// \param[in] other another statistics object
165 void Add(const RHistStats &other)
166 {
167 if (fDimensionStats.size() != other.fDimensionStats.size()) {
168 throw std::invalid_argument("number of dimensions not identical in Add");
169 }
170 fNEntries += other.fNEntries;
171 fSumW += other.fSumW;
172 fSumW2 += other.fSumW2;
173 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
174 if (fDimensionStats[i].fEnabled != other.fDimensionStats[i].fEnabled) {
175 throw std::invalid_argument("the same dimensions must be enabled to combine statistics with Add");
176 }
177 if (fDimensionStats[i].fEnabled) {
178 fDimensionStats[i].Add(other.fDimensionStats[i]);
179 }
180 }
181 }
182
183 /// Add all entries from another statistics object using atomic instructions.
184 ///
185 /// Throws an exception if the number of dimensions are not identical.
186 ///
187 /// \param[in] other another statistics object that must not be modified during the operation
189 {
190 if (fDimensionStats.size() != other.fDimensionStats.size()) {
191 throw std::invalid_argument("number of dimensions not identical in Add");
192 }
196 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
197 if (fDimensionStats[i].fEnabled != other.fDimensionStats[i].fEnabled) {
198 throw std::invalid_argument("the same dimensions must be enabled to combine statistics with Add");
199 }
200 if (fDimensionStats[i].fEnabled) {
201 fDimensionStats[i].AddAtomic(other.fDimensionStats[i]);
202 }
203 }
204 }
205
206 /// Clear this statistics object.
207 void Clear()
208 {
209 fNEntries = 0;
210 fSumW = 0;
211 fSumW2 = 0;
212 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
213 fDimensionStats[i].Clear();
214 }
215 }
216
217 /// Compute the number of effective entries.
218 ///
219 /// \f[
220 /// \frac{(\sum w_i)^2}{\sum w_i^2}
221 /// \f]
222 ///
223 /// \return the number of effective entries
225 {
226 if (fSumW2 == 0) {
227 return std::numeric_limits<double>::signaling_NaN();
228 }
229 return fSumW * fSumW / fSumW2;
230 }
231
232 /// Compute the arithmetic mean of unbinned values.
233 ///
234 /// \f[
235 /// \mu = \frac{\sum w_i \cdot x_i}{\sum w_i}
236 /// \f]
237 ///
238 /// \param[in] dim the dimension index, starting at 0
239 /// \return the arithmetic mean of unbinned values
240 double ComputeMean(std::size_t dim = 0) const
241 {
242 // First get the statistics, which includes checking the argument.
243 auto &stats = fDimensionStats.at(dim);
244 if (fSumW == 0) {
245 return std::numeric_limits<double>::signaling_NaN();
246 }
247 return stats.fSumWX / fSumW;
248 }
249
250 /// Compute the variance of unbinned values.
251 ///
252 /// This function computes the uncorrected sample variance:
253 /// \f[
254 /// \sigma^2 = \frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2
255 /// \f]
256 /// With some rewriting, this is equivalent to:
257 /// \f[
258 /// \sigma^2 = \frac{\sum w_i \cdot x_i^2}{\sum w_i} - \mu^2
259 /// \f]
260 ///
261 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
262 /// In other words, the return value is a biased estimation lower than the actual population variance.
263 ///
264 /// \param[in] dim the dimension index, starting at 0
265 /// \return the variance of unbinned values
266 double ComputeVariance(std::size_t dim = 0) const
267 {
268 // First get the statistics, which includes checking the argument.
269 auto &stats = fDimensionStats.at(dim);
270 if (fSumW == 0) {
271 return std::numeric_limits<double>::signaling_NaN();
272 }
273 double mean = ComputeMean(dim);
274 return stats.fSumWX2 / fSumW - mean * mean;
275 }
276
277 /// Compute the standard deviation of unbinned values.
278 ///
279 /// This function computes the uncorrected sample standard deviation:
280 /// \f[
281 /// \sigma = \sqrt{\frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2}
282 /// \f]
283 /// With some rewriting, this is equivalent to:
284 /// \f[
285 /// \sigma = \sqrt{\frac{\sum w_i \cdot x_i^2}{\sum w_i} - \frac{(\sum w_i \cdot x_i)^2}{(\sum w_i)^2}}
286 /// \f]
287 ///
288 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
289 /// In other words, the return value is a biased estimation lower than the actual population standard deviation.
290 ///
291 /// \param[in] dim the dimension index, starting at 0
292 /// \return the standard deviation of unbinned values
293 double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); }
294
295 // clang-format off
296 /// Compute the skewness of unbinned values.
297 ///
298 /// The skewness is the third standardized moment:
299 /// \f[
300 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right]
301 /// \f]
302 /// With support for weighted filling and after some rewriting, it is computed as:
303 /// \f[
304 /// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3}
305 /// \f]
306 ///
307 /// \param[in] dim the dimension index, starting at 0
308 /// \return the skewness of unbinned values
309 // clang-format on
310 double ComputeSkewness(std::size_t dim = 0) const
311 {
312 // First get the statistics, which includes checking the argument.
313 auto &stats = fDimensionStats.at(dim);
314 if (fSumW == 0) {
315 return std::numeric_limits<double>::signaling_NaN();
316 }
317 double mean = ComputeMean(dim);
318 double var = ComputeVariance(dim);
319 if (var == 0) {
320 return std::numeric_limits<double>::signaling_NaN();
321 }
322 double EWX3 = stats.fSumWX3 / fSumW;
323 double EWX2 = stats.fSumWX2 / fSumW;
324 return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5);
325 }
326
327 // clang-format off
328 /// Compute the (excess) kurtosis of unbinned values.
329 ///
330 /// The kurtosis is based on the fourth standardized moment:
331 /// \f[
332 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right]
333 /// \f]
334 /// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution:
335 /// \f[
336 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3
337 /// \f]
338 ///
339 /// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as:
340 /// \f[
341 /// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3
342 /// \f]
343 ///
344 /// \param[in] dim the dimension index, starting at 0
345 /// \return the (excess) kurtosis of unbinned values
346 // clang-format on
347 double ComputeKurtosis(std::size_t dim = 0) const
348 {
349 // First get the statistics, which includes checking the argument.
350 auto &stats = fDimensionStats.at(dim);
351 if (fSumW == 0) {
352 return std::numeric_limits<double>::signaling_NaN();
353 }
354 double mean = ComputeMean(dim);
355 double var = ComputeVariance(dim);
356 if (var == 0) {
357 return std::numeric_limits<double>::signaling_NaN();
358 }
359 double EWX4 = stats.fSumWX4 / fSumW;
360 double EWX3 = stats.fSumWX3 / fSumW;
361 double EWX2 = stats.fSumWX2 / fSumW;
362 return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3;
363 }
364
365private:
366 template <std::size_t I, typename... A>
367 void FillImpl(const std::tuple<A...> &args)
368 {
369 using ArgumentType = std::tuple_element_t<I, std::tuple<A...>>;
370 if (fDimensionStats[I].fEnabled) {
371 if constexpr (std::is_convertible_v<ArgumentType, double>) {
372 fDimensionStats[I].Add(std::get<I>(args));
373 } else {
374 throw std::invalid_argument("invalid type of argument in RHistStats");
375 }
376 }
377 if constexpr (I + 1 < sizeof...(A)) {
378 FillImpl<I + 1>(args);
379 }
380 }
381
382 template <std::size_t I, std::size_t N, typename... A>
383 void FillImpl(const std::tuple<A...> &args, double w)
384 {
385 using ArgumentType = std::tuple_element_t<I, std::tuple<A...>>;
386 if (fDimensionStats[I].fEnabled) {
387 if constexpr (std::is_convertible_v<ArgumentType, double>) {
388 fDimensionStats[I].Add(std::get<I>(args), w);
389 } else {
390 // Avoid compiler warning about unused parameter...
391 (void)w;
392 throw std::invalid_argument("invalid type of argument in RHistStats");
393 }
394 }
395 if constexpr (I + 1 < N) {
396 FillImpl<I + 1, N>(args, w);
397 }
398 }
399
400public:
401 /// Fill an entry into this statistics object.
402 ///
403 /// \code
404 /// ROOT::Experimental::RHistStats stats(2);
405 /// auto args = std::make_tuple(8.5, 10.5);
406 /// stats.Fill(args);
407 /// \endcode
408 ///
409 /// Throws an exception if the number of arguments does not match the number of dimensions.
410 ///
411 /// \param[in] args the arguments for each dimension
412 /// \par See also
413 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
414 /// \ref Fill(const std::tuple<A...> &args, RWeight weight) "overload for weighted filling"
415 template <typename... A>
416 void Fill(const std::tuple<A...> &args)
417 {
418 if (sizeof...(A) != fDimensionStats.size()) {
419 throw std::invalid_argument("invalid number of arguments to Fill");
420 }
421 fNEntries++;
422 fSumW++;
423 fSumW2++;
424 FillImpl<0>(args);
425 }
426
427 /// Fill an entry into this statistics object with a weight.
428 ///
429 /// \code
430 /// ROOT::Experimental::RHistStats stats(2);
431 /// auto args = std::make_tuple(8.5, 10.5);
432 /// stats.Fill(args, ROOT::Experimental::RWeight(0.8));
433 /// \endcode
434 ///
435 /// \param[in] args the arguments for each dimension
436 /// \param[in] weight the weight for this entry
437 /// \par See also
438 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
439 /// \ref Fill(const std::tuple<A...> &args) "overload for unweighted filling"
440 template <typename... A>
441 void Fill(const std::tuple<A...> &args, RWeight weight)
442 {
443 if (sizeof...(A) != fDimensionStats.size()) {
444 throw std::invalid_argument("invalid number of arguments to Fill");
445 }
446 fNEntries++;
447 double w = weight.fValue;
448 fSumW += w;
449 fSumW2 += w * w;
450 FillImpl<0, sizeof...(A)>(args, w);
451 }
452
453 /// Fill an entry into this statistics object.
454 ///
455 /// \code
456 /// ROOT::Experimental::RHistStats stats(2);
457 /// stats.Fill(8.5, 10.5);
458 /// \endcode
459 /// For weighted filling, pass an RWeight as the last argument:
460 /// \code
461 /// ROOT::Experimental::RHistStats stats(2);
462 /// stats.Fill(8.5, 10.5, ROOT::Experimental::RWeight(0.8));
463 /// \endcode
464 ///
465 /// Throws an exception if the number of arguments does not match the number of dimensions.
466 ///
467 /// \param[in] args the arguments for each dimension
468 /// \par See also
469 /// the function overloads accepting `std::tuple` \ref Fill(const std::tuple<A...> &args) "for unweighted filling"
470 /// and \ref Fill(const std::tuple<A...> &args, RWeight) "for weighted filling"
471 template <typename... A>
472 void Fill(const A &...args)
473 {
474 static_assert(sizeof...(A) >= 1, "need at least one argument to Fill");
475 if constexpr (sizeof...(A) >= 1) {
476 auto t = std::forward_as_tuple(args...);
477 if constexpr (std::is_same_v<typename Internal::LastType<A...>::type, RWeight>) {
478 static constexpr std::size_t N = sizeof...(A) - 1;
479 if (N != fDimensionStats.size()) {
480 throw std::invalid_argument("invalid number of arguments to Fill");
481 }
482 fNEntries++;
483 double w = std::get<N>(t).fValue;
484 fSumW += w;
485 fSumW2 += w * w;
486 FillImpl<0, N>(t, w);
487 } else {
488 Fill(t);
489 }
490 }
491 }
492
493 /// Scale the histogram statistics.
494 ///
495 /// \param[in] factor the scale factor
496 void Scale(double factor)
497 {
498 fSumW *= factor;
499 fSumW2 *= factor * factor;
500 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
501 if (fDimensionStats[i].fEnabled) {
502 fDimensionStats[i].Scale(factor);
503 }
504 }
505 }
506
507 /// %ROOT Streamer function to throw when trying to store an object of this class.
508 void Streamer(TBuffer &) { throw std::runtime_error("unable to store RHistStats"); }
509};
510
511} // namespace Experimental
512} // namespace ROOT
513
514#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Histogram statistics of unbinned values.
void Add(const RHistStats &other)
Add all entries from another statistics object.
void Fill(const std::tuple< A... > &args, RWeight weight)
Fill an entry into this statistics object with a weight.
void FillImpl(const std::tuple< A... > &args)
double ComputeSkewness(std::size_t dim=0) const
Compute the skewness of unbinned values.
void Fill(const A &...args)
Fill an entry into this statistics object.
std::vector< RDimensionStats > fDimensionStats
The sums per dimension.
void Clear()
Clear this statistics object.
void FillImpl(const std::tuple< A... > &args, double w)
void AddAtomic(const RHistStats &other)
Add all entries from another statistics object using atomic instructions.
double fSumW
The sum of weights.
std::size_t GetNDimensions() const
const RDimensionStats & GetDimensionStats(std::size_t dim=0) const
Get the statistics object for one dimension.
double fSumW2
The sum of weights squared.
void Scale(double factor)
Scale the histogram statistics.
void Fill(const std::tuple< A... > &args)
Fill an entry into this statistics object.
double ComputeNEffectiveEntries() const
Compute the number of effective entries.
void DisableDimension(std::size_t dim)
Disable one dimension of this statistics object.
double ComputeVariance(std::size_t dim=0) const
Compute the variance of unbinned values.
void Streamer(TBuffer &)
ROOT Streamer function to throw when trying to store an object of this class.
std::uint64_t GetNEntries() const
double ComputeMean(std::size_t dim=0) const
Compute the arithmetic mean of unbinned values.
double ComputeKurtosis(std::size_t dim=0) const
Compute the (excess) kurtosis of unbinned values.
RHistStats(std::size_t nDimensions)
Construct a statistics object.
bool IsEnabled(std::size_t dim) const
std::uint64_t fNEntries
The number of entries.
double ComputeStdDev(std::size_t dim=0) const
Compute the standard deviation of unbinned values.
Buffer base class used for serializing objects.
Definition TBuffer.h:43
Double_t x[n]
Definition legend1.C:17
#define I(x, y, z)
std::enable_if_t< std::is_integral_v< T > > AtomicAdd(T *ptr, T val)
void AddAtomic(const RDimensionStats &other)
Add another statistics object using atomic instructions.
void Add(const RDimensionStats &other)
A weight for filling histograms.
Definition RWeight.hxx:17