Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RHistStats.hxx
Go to the documentation of this file.
1/// \file
2/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
3/// Feedback is welcome!
4
5#ifndef ROOT_RHistStats
6#define ROOT_RHistStats
7
8#include "RHistUtils.hxx"
9#include "RWeight.hxx"
10
11#include <cassert>
12#include <cmath>
13#include <cstddef>
14#include <cstdint>
15#include <limits>
16#include <stdexcept>
17#include <tuple>
18#include <type_traits>
19#include <vector>
20
21class TBuffer;
22
23namespace ROOT {
24namespace Experimental {
25
26// forward declaration for friend declaration
27template <typename T>
28class RHist;
29
30/**
31Histogram statistics of unbinned values.
32
33Every call to \ref Fill(const A &... args) "Fill" updates sums to compute the number of effective entries as well as the
34arithmetic mean and other statistical quantities per dimension:
35\code
36ROOT::Experimental::RHistStats stats(1);
37stats.Fill(8.5);
38stats.Fill(1.5);
39// stats.ComputeMean() will return 5.0
40\endcode
41
42\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
43Feedback is welcome!
44*/
46 template <typename T>
47 friend class RHist;
48
49public:
50 /// Statistics for one dimension.
52 bool fEnabled = true;
53 double fSumWX = 0.0;
54 double fSumWX2 = 0.0;
55 double fSumWX3 = 0.0;
56 double fSumWX4 = 0.0;
57
58 void Add(double x)
59 {
61 fSumWX += x;
62 fSumWX2 += x * x;
63 fSumWX3 += x * x * x;
64 fSumWX4 += x * x * x * x;
65 }
66
67 void Add(double x, double w)
68 {
70 fSumWX += w * x;
71 fSumWX2 += w * x * x;
72 fSumWX3 += w * x * x * x;
73 fSumWX4 += w * x * x * x * x;
74 }
75
77 {
79 fSumWX += other.fSumWX;
80 fSumWX2 += other.fSumWX2;
81 fSumWX3 += other.fSumWX3;
82 fSumWX4 += other.fSumWX4;
83 }
84
85 /// Add another statistics object using atomic instructions.
86 ///
87 /// \param[in] other another statistics object that must not be modified during the operation
96
97 void Clear()
98 {
99 fSumWX = 0.0;
100 fSumWX2 = 0.0;
101 fSumWX3 = 0.0;
102 fSumWX4 = 0.0;
103 }
104
105 void Scale(double factor)
106 {
108 fSumWX *= factor;
109 fSumWX2 *= factor;
110 fSumWX3 *= factor;
111 fSumWX4 *= factor;
112 }
113 };
114
115private:
116 /// The number of entries
117 std::uint64_t fNEntries = 0;
118 /// The sum of weights
119 double fSumW = 0.0;
120 /// The sum of weights squared
121 double fSumW2 = 0.0;
122 /// The sums per dimension
123 std::vector<RDimensionStats> fDimensionStats;
124 /// Whether this object is tainted, in which case read access will throw. This is used if a user modifies bin
125 /// contents explicitly or slices histograms without preserving all entries, for example.
126 bool fTainted = false;
127
128 void ThrowIfTainted() const
129 {
130 if (fTainted) {
131 throw std::logic_error("statistics are tainted, for example after setting bin contents or slicing");
132 }
133 }
134
135public:
136 /// Construct a statistics object.
137 ///
138 /// \param[in] nDimensions the number of dimensions, must be > 0
139 explicit RHistStats(std::size_t nDimensions)
140 {
141 if (nDimensions == 0) {
142 throw std::invalid_argument("nDimensions must be > 0");
143 }
145 }
146
147 std::size_t GetNDimensions() const { return fDimensionStats.size(); }
148
149 std::uint64_t GetNEntries() const
150 {
152 return fNEntries;
153 }
154 double GetSumW() const
155 {
157 return fSumW;
158 }
159 double GetSumW2() const
160 {
162 return fSumW2;
163 }
164
165 /// Get the statistics object for one dimension.
166 ///
167 /// Throws an exception if the dimension is disabled.
168 ///
169 /// \param[in] dim the dimension index, starting at 0
170 /// \return the statistics object
171 const RDimensionStats &GetDimensionStats(std::size_t dim = 0) const
172 {
174
175 const RDimensionStats &stats = fDimensionStats.at(dim);
176 if (!stats.fEnabled) {
177 throw std::invalid_argument("dimension is disabled");
178 }
179 return stats;
180 }
181
182 /// Disable one dimension of this statistics object.
183 ///
184 /// All future calls to Fill will ignore the corresponding argument. Once disabled, a dimension cannot be reenabled.
185 ///
186 /// \param[in] dim the dimension index, starting at 0
187 void DisableDimension(std::size_t dim) { fDimensionStats.at(dim).fEnabled = false; }
188
189 bool IsEnabled(std::size_t dim) const { return fDimensionStats.at(dim).fEnabled; }
190
191 /// Taint this statistics object.
192 ///
193 /// It can still be filled, but any read access will throw until Clear() is called.
194 void Taint() { fTainted = true; }
195
196 bool IsTainted() const { return fTainted; }
197
198 /// Add all entries from another statistics object.
199 ///
200 /// Throws an exception if the number of dimensions are not identical.
201 ///
202 /// \param[in] other another statistics object
203 void Add(const RHistStats &other)
204 {
205 // NB: this method does *not* call ThrowIfTainted() to allow adding RHist which may contain a tainted statistics
206 // object.
207 if (fDimensionStats.size() != other.fDimensionStats.size()) {
208 throw std::invalid_argument("number of dimensions not identical in Add");
209 }
210 // For exception safety, first check all dimensions before modifying the object.
211 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
212 if (fDimensionStats[i].fEnabled != other.fDimensionStats[i].fEnabled) {
213 throw std::invalid_argument("the same dimensions must be enabled to combine statistics with Add");
214 }
215 }
216
217 fNEntries += other.fNEntries;
218 fSumW += other.fSumW;
219 fSumW2 += other.fSumW2;
220 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
221 if (fDimensionStats[i].fEnabled) {
222 fDimensionStats[i].Add(other.fDimensionStats[i]);
223 }
224 }
225 fTainted |= other.fTainted;
226 }
227
228 /// Add all entries from another statistics object using atomic instructions.
229 ///
230 /// Throws an exception if the number of dimensions are not identical.
231 ///
232 /// \param[in] other another statistics object that must not be modified during the operation
234 {
235 // NB: this method does *not* call ThrowIfTainted() to allow adding RHist which may contain a tainted statistics
236 // object.
237 if (fDimensionStats.size() != other.fDimensionStats.size()) {
238 throw std::invalid_argument("number of dimensions not identical in AddAtomic");
239 }
240 // For exception safety, first check all dimensions before modifying the object.
241 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
242 if (fDimensionStats[i].fEnabled != other.fDimensionStats[i].fEnabled) {
243 throw std::invalid_argument("the same dimensions must be enabled to combine statistics with AddAtomic");
244 }
245 }
246
250 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
251 if (fDimensionStats[i].fEnabled) {
252 fDimensionStats[i].AddAtomic(other.fDimensionStats[i]);
253 }
254 }
255 fTainted |= other.fTainted;
256 }
257
258 /// Clear this statistics object.
259 void Clear()
260 {
261 fNEntries = 0;
262 fSumW = 0;
263 fSumW2 = 0;
264 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
265 fDimensionStats[i].Clear();
266 }
267 fTainted = false;
268 }
269
270 /// Compute the number of effective entries.
271 ///
272 /// \f[
273 /// \frac{(\sum w_i)^2}{\sum w_i^2}
274 /// \f]
275 ///
276 /// \return the number of effective entries
278 {
280 if (fSumW2 == 0) {
281 return std::numeric_limits<double>::signaling_NaN();
282 }
283 return fSumW * fSumW / fSumW2;
284 }
285
286 /// Compute the arithmetic mean of unbinned values.
287 ///
288 /// \f[
289 /// \mu = \frac{\sum w_i \cdot x_i}{\sum w_i}
290 /// \f]
291 ///
292 /// \param[in] dim the dimension index, starting at 0
293 /// \return the arithmetic mean of unbinned values
294 double ComputeMean(std::size_t dim = 0) const
295 {
296 // First get the statistics, which includes checking the argument.
297 auto &stats = GetDimensionStats(dim);
298 if (fSumW == 0) {
299 return std::numeric_limits<double>::signaling_NaN();
300 }
301 return stats.fSumWX / fSumW;
302 }
303
304 /// Compute the variance of unbinned values.
305 ///
306 /// This function computes the uncorrected sample variance:
307 /// \f[
308 /// \sigma^2 = \frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2
309 /// \f]
310 /// With some rewriting, this is equivalent to:
311 /// \f[
312 /// \sigma^2 = \frac{\sum w_i \cdot x_i^2}{\sum w_i} - \mu^2
313 /// \f]
314 ///
315 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
316 /// In other words, the return value is a biased estimation lower than the actual population variance.
317 ///
318 /// \param[in] dim the dimension index, starting at 0
319 /// \return the variance of unbinned values
320 double ComputeVariance(std::size_t dim = 0) const
321 {
322 // First get the statistics, which includes checking the argument.
323 auto &stats = GetDimensionStats(dim);
324 if (fSumW == 0) {
325 return std::numeric_limits<double>::signaling_NaN();
326 }
327 double mean = ComputeMean(dim);
328 return stats.fSumWX2 / fSumW - mean * mean;
329 }
330
331 /// Compute the standard deviation of unbinned values.
332 ///
333 /// This function computes the uncorrected sample standard deviation:
334 /// \f[
335 /// \sigma = \sqrt{\frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2}
336 /// \f]
337 /// With some rewriting, this is equivalent to:
338 /// \f[
339 /// \sigma = \sqrt{\frac{\sum w_i \cdot x_i^2}{\sum w_i} - \frac{(\sum w_i \cdot x_i)^2}{(\sum w_i)^2}}
340 /// \f]
341 ///
342 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
343 /// In other words, the return value is a biased estimation lower than the actual population standard deviation.
344 ///
345 /// \param[in] dim the dimension index, starting at 0
346 /// \return the standard deviation of unbinned values
347 double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); }
348
349 // clang-format off
350 /// Compute the skewness of unbinned values.
351 ///
352 /// The skewness is the third standardized moment:
353 /// \f[
354 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right]
355 /// \f]
356 /// With support for weighted filling and after some rewriting, it is computed as:
357 /// \f[
358 /// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3}
359 /// \f]
360 ///
361 /// \param[in] dim the dimension index, starting at 0
362 /// \return the skewness of unbinned values
363 // clang-format on
364 double ComputeSkewness(std::size_t dim = 0) const
365 {
366 // First get the statistics, which includes checking the argument.
367 auto &stats = GetDimensionStats(dim);
368 if (fSumW == 0) {
369 return std::numeric_limits<double>::signaling_NaN();
370 }
371 double mean = ComputeMean(dim);
372 double var = ComputeVariance(dim);
373 if (var == 0) {
374 return std::numeric_limits<double>::signaling_NaN();
375 }
376 double EWX3 = stats.fSumWX3 / fSumW;
377 double EWX2 = stats.fSumWX2 / fSumW;
378 return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5);
379 }
380
381 // clang-format off
382 /// Compute the (excess) kurtosis of unbinned values.
383 ///
384 /// The kurtosis is based on the fourth standardized moment:
385 /// \f[
386 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right]
387 /// \f]
388 /// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution:
389 /// \f[
390 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3
391 /// \f]
392 ///
393 /// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as:
394 /// \f[
395 /// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3
396 /// \f]
397 ///
398 /// \param[in] dim the dimension index, starting at 0
399 /// \return the (excess) kurtosis of unbinned values
400 // clang-format on
401 double ComputeKurtosis(std::size_t dim = 0) const
402 {
403 // First get the statistics, which includes checking the argument.
404 auto &stats = GetDimensionStats(dim);
405 if (fSumW == 0) {
406 return std::numeric_limits<double>::signaling_NaN();
407 }
408 double mean = ComputeMean(dim);
409 double var = ComputeVariance(dim);
410 if (var == 0) {
411 return std::numeric_limits<double>::signaling_NaN();
412 }
413 double EWX4 = stats.fSumWX4 / fSumW;
414 double EWX3 = stats.fSumWX3 / fSumW;
415 double EWX2 = stats.fSumWX2 / fSumW;
416 return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3;
417 }
418
419private:
420 template <std::size_t I, std::size_t N, typename... A>
421 void CheckArguments(const std::tuple<A...> &args) const
422 {
423 using ArgumentType = std::tuple_element_t<I, std::tuple<A...>>;
424 if (fDimensionStats[I].fEnabled) {
425 if constexpr (!std::is_convertible_v<ArgumentType, double>) {
426 throw std::invalid_argument("invalid type of argument in RHistStats");
427 }
428 }
429 if constexpr (I + 1 < N) {
431 }
432 }
433
434 template <std::size_t I, typename... A>
435 void FillImpl(const std::tuple<A...> &args)
436 {
437 using ArgumentType = std::tuple_element_t<I, std::tuple<A...>>;
438 if (fDimensionStats[I].fEnabled) {
439 if constexpr (std::is_convertible_v<ArgumentType, double>) {
440 fDimensionStats[I].Add(std::get<I>(args));
441 } else {
442 // Should be checked in CheckArguments above!
443 assert(0); // GCOVR_EXCL_LINE
444 }
445 }
446 if constexpr (I + 1 < sizeof...(A)) {
447 FillImpl<I + 1>(args);
448 }
449 }
450
451 template <std::size_t I, std::size_t N, typename... A>
452 void FillImpl(const std::tuple<A...> &args, double w)
453 {
454 using ArgumentType = std::tuple_element_t<I, std::tuple<A...>>;
455 if (fDimensionStats[I].fEnabled) {
456 if constexpr (std::is_convertible_v<ArgumentType, double>) {
457 fDimensionStats[I].Add(std::get<I>(args), w);
458 } else {
459 // Avoid compiler warning about unused parameter...
460 (void)w;
461 // Should be checked in CheckArguments above!
462 assert(0); // GCOVR_EXCL_LINE
463 }
464 }
465 if constexpr (I + 1 < N) {
466 FillImpl<I + 1, N>(args, w);
467 }
468 }
469
470public:
471 /// Fill an entry into this statistics object.
472 ///
473 /// \code
474 /// ROOT::Experimental::RHistStats stats(2);
475 /// auto args = std::make_tuple(8.5, 10.5);
476 /// stats.Fill(args);
477 /// \endcode
478 ///
479 /// Throws an exception if the number of arguments does not match the number of dimensions.
480 ///
481 /// \param[in] args the arguments for each dimension
482 /// \par See also
483 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
484 /// \ref Fill(const std::tuple<A...> &args, RWeight weight) "overload for weighted filling"
485 template <typename... A>
486 void Fill(const std::tuple<A...> &args)
487 {
488 if (sizeof...(A) != fDimensionStats.size()) {
489 throw std::invalid_argument("invalid number of arguments to Fill");
490 }
491 // For exception safety, first check all arguments before modifying the object.
492 CheckArguments<0, sizeof...(A)>(args);
493
494 fNEntries++;
495 fSumW++;
496 fSumW2++;
497 FillImpl<0>(args);
498 }
499
500 /// Fill an entry into this statistics object with a weight.
501 ///
502 /// \code
503 /// ROOT::Experimental::RHistStats stats(2);
504 /// auto args = std::make_tuple(8.5, 10.5);
505 /// stats.Fill(args, ROOT::Experimental::RWeight(0.8));
506 /// \endcode
507 ///
508 /// \param[in] args the arguments for each dimension
509 /// \param[in] weight the weight for this entry
510 /// \par See also
511 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
512 /// \ref Fill(const std::tuple<A...> &args) "overload for unweighted filling"
513 template <typename... A>
514 void Fill(const std::tuple<A...> &args, RWeight weight)
515 {
516 if (sizeof...(A) != fDimensionStats.size()) {
517 throw std::invalid_argument("invalid number of arguments to Fill");
518 }
519 // For exception safety, first check all arguments before modifying the object.
520 CheckArguments<0, sizeof...(A)>(args);
521
522 fNEntries++;
523 double w = weight.fValue;
524 fSumW += w;
525 fSumW2 += w * w;
526 FillImpl<0, sizeof...(A)>(args, w);
527 }
528
529 /// Fill an entry into this statistics object.
530 ///
531 /// \code
532 /// ROOT::Experimental::RHistStats stats(2);
533 /// stats.Fill(8.5, 10.5);
534 /// \endcode
535 /// For weighted filling, pass an RWeight as the last argument:
536 /// \code
537 /// ROOT::Experimental::RHistStats stats(2);
538 /// stats.Fill(8.5, 10.5, ROOT::Experimental::RWeight(0.8));
539 /// \endcode
540 ///
541 /// Throws an exception if the number of arguments does not match the number of dimensions.
542 ///
543 /// \param[in] args the arguments for each dimension
544 /// \par See also
545 /// the function overloads accepting `std::tuple` \ref Fill(const std::tuple<A...> &args) "for unweighted filling"
546 /// and \ref Fill(const std::tuple<A...> &args, RWeight) "for weighted filling"
547 template <typename... A>
548 void Fill(const A &...args)
549 {
550 static_assert(sizeof...(A) >= 1, "need at least one argument to Fill");
551 if constexpr (sizeof...(A) >= 1) {
552 auto t = std::forward_as_tuple(args...);
553 if constexpr (std::is_same_v<typename Internal::LastType<A...>::type, RWeight>) {
554 static constexpr std::size_t N = sizeof...(A) - 1;
555 if (N != fDimensionStats.size()) {
556 throw std::invalid_argument("invalid number of arguments to Fill");
557 }
558 // For exception safety, first check all arguments before modifying the object.
560
561 fNEntries++;
562 double w = std::get<N>(t).fValue;
563 fSumW += w;
564 fSumW2 += w * w;
565 FillImpl<0, N>(t, w);
566 } else {
567 Fill(t);
568 }
569 }
570 }
571
572 /// Scale the histogram statistics.
573 ///
574 /// \param[in] factor the scale factor
575 void Scale(double factor)
576 {
577 // NB: this method does *not* call ThrowIfTainted() to allow scaling RHist which may contain a tainted statistics
578 // object.
579 fSumW *= factor;
580 fSumW2 *= factor * factor;
581 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
582 if (fDimensionStats[i].fEnabled) {
583 fDimensionStats[i].Scale(factor);
584 }
585 }
586 }
587
588 /// %ROOT Streamer function to throw when trying to store an object of this class.
589 void Streamer(TBuffer &) { throw std::runtime_error("unable to store RHistStats"); }
590};
591
592} // namespace Experimental
593} // namespace ROOT
594
595#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Histogram statistics of unbinned values.
void Add(const RHistStats &other)
Add all entries from another statistics object.
void Fill(const std::tuple< A... > &args, RWeight weight)
Fill an entry into this statistics object with a weight.
void FillImpl(const std::tuple< A... > &args)
double ComputeSkewness(std::size_t dim=0) const
Compute the skewness of unbinned values.
void Fill(const A &...args)
Fill an entry into this statistics object.
std::vector< RDimensionStats > fDimensionStats
The sums per dimension.
void Clear()
Clear this statistics object.
void Taint()
Taint this statistics object.
void FillImpl(const std::tuple< A... > &args, double w)
void AddAtomic(const RHistStats &other)
Add all entries from another statistics object using atomic instructions.
double fSumW
The sum of weights.
std::size_t GetNDimensions() const
const RDimensionStats & GetDimensionStats(std::size_t dim=0) const
Get the statistics object for one dimension.
double fSumW2
The sum of weights squared.
void Scale(double factor)
Scale the histogram statistics.
void Fill(const std::tuple< A... > &args)
Fill an entry into this statistics object.
double ComputeNEffectiveEntries() const
Compute the number of effective entries.
void DisableDimension(std::size_t dim)
Disable one dimension of this statistics object.
void CheckArguments(const std::tuple< A... > &args) const
double ComputeVariance(std::size_t dim=0) const
Compute the variance of unbinned values.
void Streamer(TBuffer &)
ROOT Streamer function to throw when trying to store an object of this class.
std::uint64_t GetNEntries() const
double ComputeMean(std::size_t dim=0) const
Compute the arithmetic mean of unbinned values.
double ComputeKurtosis(std::size_t dim=0) const
Compute the (excess) kurtosis of unbinned values.
RHistStats(std::size_t nDimensions)
Construct a statistics object.
bool IsEnabled(std::size_t dim) const
std::uint64_t fNEntries
The number of entries.
bool fTainted
Whether this object is tainted, in which case read access will throw.
double ComputeStdDev(std::size_t dim=0) const
Compute the standard deviation of unbinned values.
A histogram for aggregation of data along multiple dimensions.
Definition RHist.hxx:65
Buffer base class used for serializing objects.
Definition TBuffer.h:43
Double_t x[n]
Definition legend1.C:17
#define I(x, y, z)
std::enable_if_t< std::is_integral_v< T > > AtomicAdd(T *ptr, T val)
void AddAtomic(const RDimensionStats &other)
Add another statistics object using atomic instructions.
void Add(const RDimensionStats &other)
A weight for filling histograms.
Definition RWeight.hxx:17