Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RHistStats.hxx
Go to the documentation of this file.
1/// \file
2/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
3/// Feedback is welcome!
4
5#ifndef ROOT_RHistStats
6#define ROOT_RHistStats
7
8#include "RHistUtils.hxx"
10#include "RWeight.hxx"
11
12#include <cmath>
13#include <cstddef>
14#include <cstdint>
15#include <stdexcept>
16#include <tuple>
17#include <vector>
18
19class TBuffer;
20
21namespace ROOT {
22namespace Experimental {
23
24/**
25Histogram statistics of unbinned values.
26
27Every call to \ref Fill(const A &... args) "Fill" updates sums to compute the number of effective entries as well as the
28arithmetic mean and other statistical quantities per dimension:
29\code
30ROOT::Experimental::RHistStats stats(1);
31stats.Fill(8.5);
32stats.Fill(1.5);
33// stats.ComputeMean() will return 5.0
34\endcode
35
36\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
37Feedback is welcome!
38*/
40public:
41 /// Statistics for one dimension.
43 double fSumWX = 0.0;
44 double fSumWX2 = 0.0;
45 double fSumWX3 = 0.0;
46 double fSumWX4 = 0.0;
47
48 void Add(double x)
49 {
50 fSumWX += x;
51 fSumWX2 += x * x;
52 fSumWX3 += x * x * x;
53 fSumWX4 += x * x * x * x;
54 }
55
56 void Add(double x, double w)
57 {
58 fSumWX += w * x;
59 fSumWX2 += w * x * x;
60 fSumWX3 += w * x * x * x;
61 fSumWX4 += w * x * x * x * x;
62 }
63
65 {
66 fSumWX += other.fSumWX;
67 fSumWX2 += other.fSumWX2;
68 fSumWX3 += other.fSumWX3;
69 fSumWX4 += other.fSumWX4;
70 }
71
72 /// Add another statistics object using atomic instructions.
73 ///
74 /// \param[in] other another statistics object that must not be modified during the operation
82
83 void Clear()
84 {
85 fSumWX = 0.0;
86 fSumWX2 = 0.0;
87 fSumWX3 = 0.0;
88 fSumWX4 = 0.0;
89 }
90
91 void Scale(double factor)
92 {
93 fSumWX *= factor;
94 fSumWX2 *= factor;
95 fSumWX3 *= factor;
96 fSumWX4 *= factor;
97 }
98 };
99
100private:
101 /// The number of entries
102 std::uint64_t fNEntries = 0;
103 /// The sum of weights
104 double fSumW = 0.0;
105 /// The sum of weights squared
106 double fSumW2 = 0.0;
107 /// The sums per dimension
108 std::vector<RDimensionStats> fDimensionStats;
109
110public:
111 /// Construct a statistics object.
112 ///
113 /// \param[in] nDimensions the number of dimensions, must be > 0
114 explicit RHistStats(std::size_t nDimensions)
115 {
116 if (nDimensions == 0) {
117 throw std::invalid_argument("nDimensions must be > 0");
118 }
120 }
121
122 std::size_t GetNDimensions() const { return fDimensionStats.size(); }
123
124 std::uint64_t GetNEntries() const { return fNEntries; }
125 double GetSumW() const { return fSumW; }
126 double GetSumW2() const { return fSumW2; }
127
128 const RDimensionStats &GetDimensionStats(std::size_t dim = 0) const { return fDimensionStats.at(dim); }
129
130 /// Add all entries from another statistics object.
131 ///
132 /// Throws an exception if the number of dimensions are not identical.
133 ///
134 /// \param[in] other another statistics object
135 void Add(const RHistStats &other)
136 {
137 if (fDimensionStats.size() != other.fDimensionStats.size()) {
138 throw std::invalid_argument("number of dimensions not identical in Add");
139 }
140 fNEntries += other.fNEntries;
141 fSumW += other.fSumW;
142 fSumW2 += other.fSumW2;
143 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
144 fDimensionStats[i].Add(other.fDimensionStats[i]);
145 }
146 }
147
148 /// Add all entries from another statistics object using atomic instructions.
149 ///
150 /// Throws an exception if the number of dimensions are not identical.
151 ///
152 /// \param[in] other another statistics object that must not be modified during the operation
154 {
155 if (fDimensionStats.size() != other.fDimensionStats.size()) {
156 throw std::invalid_argument("number of dimensions not identical in Add");
157 }
161 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
162 fDimensionStats[i].AddAtomic(other.fDimensionStats[i]);
163 }
164 }
165
166 /// Clear this statistics object.
167 void Clear()
168 {
169 fNEntries = 0;
170 fSumW = 0;
171 fSumW2 = 0;
172 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
173 fDimensionStats[i].Clear();
174 }
175 }
176
177 /// Compute the number of effective entries.
178 ///
179 /// \f[
180 /// \frac{(\sum w_i)^2}{\sum w_i^2}
181 /// \f]
182 ///
183 /// \return the number of effective entries
185 {
186 if (fSumW2 == 0) {
187 return 0;
188 }
189 return fSumW * fSumW / fSumW2;
190 }
191
192 /// Compute the arithmetic mean of unbinned values.
193 ///
194 /// \f[
195 /// \mu = \frac{\sum w_i \cdot x_i}{\sum w_i}
196 /// \f]
197 ///
198 /// \param[in] dim the dimension index, starting at 0
199 /// \return the arithmetic mean of unbinned values
200 double ComputeMean(std::size_t dim = 0) const
201 {
202 // First get the statistics, which includes checking the argument.
203 auto &stats = fDimensionStats.at(dim);
204 if (fSumW == 0) {
205 return 0;
206 }
207 return stats.fSumWX / fSumW;
208 }
209
210 /// Compute the variance of unbinned values.
211 ///
212 /// This function computes the uncorrected sample variance:
213 /// \f[
214 /// \sigma^2 = \frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2
215 /// \f]
216 /// With some rewriting, this is equivalent to:
217 /// \f[
218 /// \sigma^2 = \frac{\sum w_i \cdot x_i^2}{\sum w_i} - \mu^2
219 /// \f]
220 ///
221 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
222 /// In other words, the return value is a biased estimation lower than the actual population variance.
223 ///
224 /// \param[in] dim the dimension index, starting at 0
225 /// \return the variance of unbinned values
226 double ComputeVariance(std::size_t dim = 0) const
227 {
228 // First get the statistics, which includes checking the argument.
229 auto &stats = fDimensionStats.at(dim);
230 if (fSumW == 0) {
231 return 0;
232 }
233 double mean = ComputeMean(dim);
234 return stats.fSumWX2 / fSumW - mean * mean;
235 }
236
237 /// Compute the standard deviation of unbinned values.
238 ///
239 /// This function computes the uncorrected sample standard deviation:
240 /// \f[
241 /// \sigma = \sqrt{\frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2}
242 /// \f]
243 /// With some rewriting, this is equivalent to:
244 /// \f[
245 /// \sigma = \sqrt{\frac{\sum w_i \cdot x_i^2}{\sum w_i} - \frac{(\sum w_i \cdot x_i)^2}{(\sum w_i)^2}}
246 /// \f]
247 ///
248 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
249 /// In other words, the return value is a biased estimation lower than the actual population standard deviation.
250 ///
251 /// \param[in] dim the dimension index, starting at 0
252 /// \return the standard deviation of unbinned values
253 double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); }
254
255 // clang-format off
256 /// Compute the skewness of unbinned values.
257 ///
258 /// The skewness is the third standardized moment:
259 /// \f[
260 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right]
261 /// \f]
262 /// With support for weighted filling and after some rewriting, it is computed as:
263 /// \f[
264 /// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3}
265 /// \f]
266 ///
267 /// \param[in] dim the dimension index, starting at 0
268 /// \return the skewness of unbinned values
269 // clang-format on
270 double ComputeSkewness(std::size_t dim = 0) const
271 {
272 // First get the statistics, which includes checking the argument.
273 auto &stats = fDimensionStats.at(dim);
274 if (fSumW == 0) {
275 return 0;
276 }
277 double mean = ComputeMean(dim);
278 double var = ComputeVariance(dim);
279 if (var == 0) {
280 return 0;
281 }
282 double EWX3 = stats.fSumWX3 / fSumW;
283 double EWX2 = stats.fSumWX2 / fSumW;
284 return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5);
285 }
286
287 // clang-format off
288 /// Compute the (excess) kurtosis of unbinned values.
289 ///
290 /// The kurtosis is based on the fourth standardized moment:
291 /// \f[
292 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right]
293 /// \f]
294 /// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution:
295 /// \f[
296 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3
297 /// \f]
298 ///
299 /// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as:
300 /// \f[
301 /// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3
302 /// \f]
303 ///
304 /// \param[in] dim the dimension index, starting at 0
305 /// \return the (excess) kurtosis of unbinned values
306 // clang-format on
307 double ComputeKurtosis(std::size_t dim = 0) const
308 {
309 // First get the statistics, which includes checking the argument.
310 auto &stats = fDimensionStats.at(dim);
311 if (fSumW == 0) {
312 return 0;
313 }
314 double mean = ComputeMean(dim);
315 double var = ComputeVariance(dim);
316 if (var == 0) {
317 return 0;
318 }
319 double EWX4 = stats.fSumWX4 / fSumW;
320 double EWX3 = stats.fSumWX3 / fSumW;
321 double EWX2 = stats.fSumWX2 / fSumW;
322 return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3;
323 }
324
325private:
326 template <std::size_t I, typename... A>
327 void FillImpl(const std::tuple<A...> &args)
328 {
329 fDimensionStats[I].Add(std::get<I>(args));
330 if constexpr (I + 1 < sizeof...(A)) {
331 FillImpl<I + 1>(args);
332 }
333 }
334
335 template <std::size_t I, std::size_t N, typename... A>
336 void FillImpl(const std::tuple<A...> &args, double w)
337 {
338 fDimensionStats[I].Add(std::get<I>(args), w);
339 if constexpr (I + 1 < N) {
340 FillImpl<I + 1, N>(args, w);
341 }
342 }
343
344public:
345 /// Fill an entry into this statistics object.
346 ///
347 /// \code
348 /// ROOT::Experimental::RHistStats stats(2);
349 /// auto args = std::make_tuple(8.5, 10.5);
350 /// stats.Fill(args);
351 /// \endcode
352 ///
353 /// Throws an exception if the number of arguments does not match the number of dimensions.
354 ///
355 /// \param[in] args the arguments for each dimension
356 /// \par See also
357 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
358 /// \ref Fill(const std::tuple<A...> &args, RWeight weight) "overload for weighted filling"
359 template <typename... A>
360 void Fill(const std::tuple<A...> &args)
361 {
362 if (sizeof...(A) != fDimensionStats.size()) {
363 throw std::invalid_argument("invalid number of arguments to Fill");
364 }
365 fNEntries++;
366 fSumW++;
367 fSumW2++;
368 FillImpl<0>(args);
369 }
370
371 /// Fill an entry into this statistics object with a weight.
372 ///
373 /// \code
374 /// ROOT::Experimental::RHistStats stats(2);
375 /// auto args = std::make_tuple(8.5, 10.5);
376 /// stats.Fill(args, ROOT::Experimental::RWeight(0.8));
377 /// \endcode
378 ///
379 /// \param[in] args the arguments for each dimension
380 /// \param[in] weight the weight for this entry
381 /// \par See also
382 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
383 /// \ref Fill(const std::tuple<A...> &args) "overload for unweighted filling"
384 template <typename... A>
385 void Fill(const std::tuple<A...> &args, RWeight weight)
386 {
387 if (sizeof...(A) != fDimensionStats.size()) {
388 throw std::invalid_argument("invalid number of arguments to Fill");
389 }
390 fNEntries++;
391 double w = weight.fValue;
392 fSumW += w;
393 fSumW2 += w * w;
394 FillImpl<0, sizeof...(A)>(args, w);
395 }
396
397 /// Fill an entry into this statistics object.
398 ///
399 /// \code
400 /// ROOT::Experimental::RHistStats stats(2);
401 /// stats.Fill(8.5, 10.5);
402 /// \endcode
403 /// For weighted filling, pass an RWeight as the last argument:
404 /// \code
405 /// ROOT::Experimental::RHistStats stats(2);
406 /// stats.Fill(8.5, 10.5, ROOT::Experimental::RWeight(0.8));
407 /// \endcode
408 ///
409 /// Throws an exception if the number of arguments does not match the number of dimensions.
410 ///
411 /// \param[in] args the arguments for each dimension
412 /// \par See also
413 /// the function overloads accepting `std::tuple` \ref Fill(const std::tuple<A...> &args) "for unweighted filling"
414 /// and \ref Fill(const std::tuple<A...> &args, RWeight) "for weighted filling"
415 template <typename... A>
416 void Fill(const A &...args)
417 {
418 auto t = std::forward_as_tuple(args...);
419 if constexpr (std::is_same_v<typename Internal::LastType<A...>::type, RWeight>) {
420 static constexpr std::size_t N = sizeof...(A) - 1;
421 if (N != fDimensionStats.size()) {
422 throw std::invalid_argument("invalid number of arguments to Fill");
423 }
424 fNEntries++;
425 double w = std::get<N>(t).fValue;
426 fSumW += w;
427 fSumW2 += w * w;
428 FillImpl<0, N>(t, w);
429 } else {
430 Fill(t);
431 }
432 }
433
434 /// Scale the histogram statistics.
435 ///
436 /// \param[in] factor the scale factor
437 void Scale(double factor)
438 {
439 fSumW *= factor;
440 fSumW2 *= factor * factor;
441 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
442 fDimensionStats[i].Scale(factor);
443 }
444 }
445
446 /// %ROOT Streamer function to throw when trying to store an object of this class.
447 void Streamer(TBuffer &) { throw std::runtime_error("unable to store RHistStats"); }
448};
449
450} // namespace Experimental
451} // namespace ROOT
452
453#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Histogram statistics of unbinned values.
void Add(const RHistStats &other)
Add all entries from another statistics object.
void Fill(const std::tuple< A... > &args, RWeight weight)
Fill an entry into this statistics object with a weight.
void FillImpl(const std::tuple< A... > &args)
double ComputeSkewness(std::size_t dim=0) const
Compute the skewness of unbinned values.
void Fill(const A &...args)
Fill an entry into this statistics object.
std::vector< RDimensionStats > fDimensionStats
The sums per dimension.
void Clear()
Clear this statistics object.
void FillImpl(const std::tuple< A... > &args, double w)
void AddAtomic(const RHistStats &other)
Add all entries from another statistics object using atomic instructions.
double fSumW
The sum of weights.
std::size_t GetNDimensions() const
const RDimensionStats & GetDimensionStats(std::size_t dim=0) const
double fSumW2
The sum of weights squared.
void Scale(double factor)
Scale the histogram statistics.
void Fill(const std::tuple< A... > &args)
Fill an entry into this statistics object.
double ComputeNEffectiveEntries() const
Compute the number of effective entries.
double ComputeVariance(std::size_t dim=0) const
Compute the variance of unbinned values.
void Streamer(TBuffer &)
ROOT Streamer function to throw when trying to store an object of this class.
std::uint64_t GetNEntries() const
double ComputeMean(std::size_t dim=0) const
Compute the arithmetic mean of unbinned values.
double ComputeKurtosis(std::size_t dim=0) const
Compute the (excess) kurtosis of unbinned values.
RHistStats(std::size_t nDimensions)
Construct a statistics object.
std::uint64_t fNEntries
The number of entries.
double ComputeStdDev(std::size_t dim=0) const
Compute the standard deviation of unbinned values.
Buffer base class used for serializing objects.
Definition TBuffer.h:43
Double_t x[n]
Definition legend1.C:17
#define I(x, y, z)
std::enable_if_t< std::is_integral_v< T > > AtomicAdd(T *ptr, T val)
void AddAtomic(const RDimensionStats &other)
Add another statistics object using atomic instructions.
void Add(const RDimensionStats &other)
A weight for filling histograms.
Definition RWeight.hxx:17