Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RHistStats.hxx
Go to the documentation of this file.
1/// \file
2/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
3/// Feedback is welcome!
4
5#ifndef ROOT_RHistStats
6#define ROOT_RHistStats
7
8#include "RHistUtils.hxx"
10#include "RWeight.hxx"
11
12#include <cmath>
13#include <cstdint>
14#include <stdexcept>
15#include <tuple>
16#include <vector>
17
18class TBuffer;
19
20namespace ROOT {
21namespace Experimental {
22
23/**
24Histogram statistics of unbinned values.
25
26Every call to \ref Fill(const A &... args) "Fill" updates sums to compute the number of effective entries as well as the
27arithmetic mean and other statistical quantities per dimension:
28\code
29ROOT::Experimental::RHistStats stats(1);
30stats.Fill(8.5);
31stats.Fill(1.5);
32// stats.ComputeMean() will return 5.0
33\endcode
34
35\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
36Feedback is welcome!
37*/
39public:
40 /// Statistics for one dimension.
42 double fSumWX = 0.0;
43 double fSumWX2 = 0.0;
44 double fSumWX3 = 0.0;
45 double fSumWX4 = 0.0;
46
47 void Add(double x)
48 {
49 fSumWX += x;
50 fSumWX2 += x * x;
51 fSumWX3 += x * x * x;
52 fSumWX4 += x * x * x * x;
53 }
54
55 void Add(double x, double w)
56 {
57 fSumWX += w * x;
58 fSumWX2 += w * x * x;
59 fSumWX3 += w * x * x * x;
60 fSumWX4 += w * x * x * x * x;
61 }
62
64 {
65 fSumWX += other.fSumWX;
66 fSumWX2 += other.fSumWX2;
67 fSumWX3 += other.fSumWX3;
68 fSumWX4 += other.fSumWX4;
69 }
70
71 void Clear()
72 {
73 fSumWX = 0.0;
74 fSumWX2 = 0.0;
75 fSumWX3 = 0.0;
76 fSumWX4 = 0.0;
77 }
78 };
79
80private:
81 /// The number of entries
82 std::uint64_t fNEntries = 0;
83 /// The sum of weights
84 double fSumW = 0.0;
85 /// The sum of weights squared
86 double fSumW2 = 0.0;
87 /// The sums per dimension
88 std::vector<RDimensionStats> fDimensionStats;
89
90public:
91 /// Construct a statistics object.
92 ///
93 /// \param[in] nDimensions the number of dimensions, must be > 0
94 explicit RHistStats(std::size_t nDimensions)
95 {
96 if (nDimensions == 0) {
97 throw std::invalid_argument("nDimensions must be > 0");
98 }
100 }
101
102 std::size_t GetNDimensions() const { return fDimensionStats.size(); }
103
104 std::uint64_t GetNEntries() const { return fNEntries; }
105 double GetSumW() const { return fSumW; }
106 double GetSumW2() const { return fSumW2; }
107
108 const RDimensionStats &GetDimensionStats(std::size_t dim = 0) const { return fDimensionStats.at(dim); }
109
110 /// Add all entries from another statistics object.
111 ///
112 /// Throws an exception if the number of dimensions are not identical.
113 ///
114 /// \param[in] other another statistics object
115 void Add(const RHistStats &other)
116 {
117 if (fDimensionStats.size() != other.fDimensionStats.size()) {
118 throw std::invalid_argument("number of dimensions not identical in Add");
119 }
120 fNEntries += other.fNEntries;
121 fSumW += other.fSumW;
122 fSumW2 += other.fSumW2;
123 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
124 fDimensionStats[i].Add(other.fDimensionStats[i]);
125 }
126 }
127
128 /// Clear this statistics object.
129 void Clear()
130 {
131 fNEntries = 0;
132 fSumW = 0;
133 fSumW2 = 0;
134 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
135 fDimensionStats[i].Clear();
136 }
137 }
138
139 /// Compute the number of effective entries.
140 ///
141 /// \f[
142 /// \frac{(\sum w_i)^2}{\sum w_i^2}
143 /// \f]
144 ///
145 /// \return the number of effective entries
147 {
148 if (fSumW2 == 0) {
149 return 0;
150 }
151 return fSumW * fSumW / fSumW2;
152 }
153
154 /// Compute the arithmetic mean of unbinned values.
155 ///
156 /// \f[
157 /// \mu = \frac{\sum w_i \cdot x_i}{\sum w_i}
158 /// \f]
159 ///
160 /// \param[in] dim the dimension index, starting at 0
161 /// \return the arithmetic mean of unbinned values
162 double ComputeMean(std::size_t dim = 0) const
163 {
164 // First get the statistics, which includes checking the argument.
165 auto &stats = fDimensionStats.at(dim);
166 if (fSumW == 0) {
167 return 0;
168 }
169 return stats.fSumWX / fSumW;
170 }
171
172 /// Compute the variance of unbinned values.
173 ///
174 /// This function computes the uncorrected sample variance:
175 /// \f[
176 /// \sigma^2 = \frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2
177 /// \f]
178 /// With some rewriting, this is equivalent to:
179 /// \f[
180 /// \sigma^2 = \frac{\sum w_i \cdot x_i^2}{\sum w_i} - \mu^2
181 /// \f]
182 ///
183 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
184 /// In other words, the return value is a biased estimation lower than the actual population variance.
185 ///
186 /// \param[in] dim the dimension index, starting at 0
187 /// \return the variance of unbinned values
188 double ComputeVariance(std::size_t dim = 0) const
189 {
190 // First get the statistics, which includes checking the argument.
191 auto &stats = fDimensionStats.at(dim);
192 if (fSumW == 0) {
193 return 0;
194 }
195 double mean = ComputeMean(dim);
196 return stats.fSumWX2 / fSumW - mean * mean;
197 }
198
199 /// Compute the standard deviation of unbinned values.
200 ///
201 /// This function computes the uncorrected sample standard deviation:
202 /// \f[
203 /// \sigma = \sqrt{\frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2}
204 /// \f]
205 /// With some rewriting, this is equivalent to:
206 /// \f[
207 /// \sigma = \sqrt{\frac{\sum w_i \cdot x_i^2}{\sum w_i} - \frac{(\sum w_i \cdot x_i)^2}{(\sum w_i)^2}}
208 /// \f]
209 ///
210 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
211 /// In other words, the return value is a biased estimation lower than the actual population standard deviation.
212 ///
213 /// \param[in] dim the dimension index, starting at 0
214 /// \return the standard deviation of unbinned values
215 double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); }
216
217 // clang-format off
218 /// Compute the skewness of unbinned values.
219 ///
220 /// The skewness is the third standardized moment:
221 /// \f[
222 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right]
223 /// \f]
224 /// With support for weighted filling and after some rewriting, it is computed as:
225 /// \f[
226 /// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3}
227 /// \f]
228 ///
229 /// \param[in] dim the dimension index, starting at 0
230 /// \return the skewness of unbinned values
231 // clang-format on
232 double ComputeSkewness(std::size_t dim = 0) const
233 {
234 // First get the statistics, which includes checking the argument.
235 auto &stats = fDimensionStats.at(dim);
236 if (fSumW == 0) {
237 return 0;
238 }
239 double mean = ComputeMean(dim);
240 double var = ComputeVariance(dim);
241 if (var == 0) {
242 return 0;
243 }
244 double EWX3 = stats.fSumWX3 / fSumW;
245 double EWX2 = stats.fSumWX2 / fSumW;
246 return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5);
247 }
248
249 // clang-format off
250 /// Compute the (excess) kurtosis of unbinned values.
251 ///
252 /// The kurtosis is based on the fourth standardized moment:
253 /// \f[
254 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right]
255 /// \f]
256 /// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution:
257 /// \f[
258 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3
259 /// \f]
260 ///
261 /// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as:
262 /// \f[
263 /// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3
264 /// \f]
265 ///
266 /// \param[in] dim the dimension index, starting at 0
267 /// \return the (excess) kurtosis of unbinned values
268 // clang-format on
269 double ComputeKurtosis(std::size_t dim = 0) const
270 {
271 // First get the statistics, which includes checking the argument.
272 auto &stats = fDimensionStats.at(dim);
273 if (fSumW == 0) {
274 return 0;
275 }
276 double mean = ComputeMean(dim);
277 double var = ComputeVariance(dim);
278 if (var == 0) {
279 return 0;
280 }
281 double EWX4 = stats.fSumWX4 / fSumW;
282 double EWX3 = stats.fSumWX3 / fSumW;
283 double EWX2 = stats.fSumWX2 / fSumW;
284 return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3;
285 }
286
287private:
288 template <std::size_t I, typename... A>
289 void FillImpl(const std::tuple<A...> &args)
290 {
291 fDimensionStats[I].Add(std::get<I>(args));
292 if constexpr (I + 1 < sizeof...(A)) {
293 FillImpl<I + 1>(args);
294 }
295 }
296
297 template <std::size_t I, std::size_t N, typename... A>
298 void FillImpl(const std::tuple<A...> &args, double w)
299 {
300 fDimensionStats[I].Add(std::get<I>(args), w);
301 if constexpr (I + 1 < N) {
302 FillImpl<I + 1, N>(args, w);
303 }
304 }
305
306public:
307 /// Fill an entry into this statistics object.
308 ///
309 /// \code
310 /// ROOT::Experimental::RHistStats stats(2);
311 /// auto args = std::make_tuple(8.5, 10.5);
312 /// stats.Fill(args);
313 /// \endcode
314 ///
315 /// Throws an exception if the number of arguments does not match the number of dimensions.
316 ///
317 /// \param[in] args the arguments for each dimension
318 /// \par See also
319 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
320 /// \ref Fill(const std::tuple<A...> &args, RWeight weight) "overload for weighted filling"
321 template <typename... A>
322 void Fill(const std::tuple<A...> &args)
323 {
324 if (sizeof...(A) != fDimensionStats.size()) {
325 throw std::invalid_argument("invalid number of arguments to Fill");
326 }
327 fNEntries++;
328 fSumW++;
329 fSumW2++;
330 FillImpl<0>(args);
331 }
332
333 /// Fill an entry into this statistics object with a weight.
334 ///
335 /// \code
336 /// ROOT::Experimental::RHistStats stats(2);
337 /// auto args = std::make_tuple(8.5, 10.5);
338 /// stats.Fill(args, ROOT::Experimental::RWeight(0.8));
339 /// \endcode
340 ///
341 /// \param[in] args the arguments for each dimension
342 /// \param[in] weight the weight for this entry
343 /// \par See also
344 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
345 /// \ref Fill(const std::tuple<A...> &args) "overload for unweighted filling"
346 template <typename... A>
347 void Fill(const std::tuple<A...> &args, RWeight weight)
348 {
349 if (sizeof...(A) != fDimensionStats.size()) {
350 throw std::invalid_argument("invalid number of arguments to Fill");
351 }
352 fNEntries++;
353 double w = weight.fValue;
354 fSumW += w;
355 fSumW2 += w * w;
356 FillImpl<0, sizeof...(A)>(args, w);
357 }
358
359 /// Fill an entry into this statistics object.
360 ///
361 /// \code
362 /// ROOT::Experimental::RHistStats stats(2);
363 /// stats.Fill(8.5, 10.5);
364 /// \endcode
365 /// For weighted filling, pass an RWeight as the last argument:
366 /// \code
367 /// ROOT::Experimental::RHistStats stats(2);
368 /// stats.Fill(8.5, 10.5, ROOT::Experimental::RWeight(0.8));
369 /// \endcode
370 ///
371 /// Throws an exception if the number of arguments does not match the number of dimensions.
372 ///
373 /// \param[in] args the arguments for each dimension
374 /// \par See also
375 /// the function overloads accepting `std::tuple` \ref Fill(const std::tuple<A...> &args) "for unweighted filling"
376 /// and \ref Fill(const std::tuple<A...> &args, RWeight) "for weighted filling"
377 template <typename... A>
378 void Fill(const A &...args)
379 {
380 auto t = std::forward_as_tuple(args...);
381 if constexpr (std::is_same_v<typename Internal::LastType<A...>::type, RWeight>) {
382 static constexpr std::size_t N = sizeof...(A) - 1;
383 if (N != fDimensionStats.size()) {
384 throw std::invalid_argument("invalid number of arguments to Fill");
385 }
386 fNEntries++;
387 double w = std::get<N>(t).fValue;
388 fSumW += w;
389 fSumW2 += w * w;
390 FillImpl<0, N>(t, w);
391 } else {
392 Fill(t);
393 }
394 }
395
396 /// %ROOT Streamer function to throw when trying to store an object of this class.
397 void Streamer(TBuffer &) { throw std::runtime_error("unable to store RHistStats"); }
398};
399
400} // namespace Experimental
401} // namespace ROOT
402
403#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Histogram statistics of unbinned values.
void Add(const RHistStats &other)
Add all entries from another statistics object.
void Fill(const std::tuple< A... > &args, RWeight weight)
Fill an entry into this statistics object with a weight.
void FillImpl(const std::tuple< A... > &args)
double ComputeSkewness(std::size_t dim=0) const
Compute the skewness of unbinned values.
void Fill(const A &...args)
Fill an entry into this statistics object.
std::vector< RDimensionStats > fDimensionStats
The sums per dimension.
void Clear()
Clear this statistics object.
void FillImpl(const std::tuple< A... > &args, double w)
double fSumW
The sum of weights.
std::size_t GetNDimensions() const
const RDimensionStats & GetDimensionStats(std::size_t dim=0) const
double fSumW2
The sum of weights squared.
void Fill(const std::tuple< A... > &args)
Fill an entry into this statistics object.
double ComputeNEffectiveEntries() const
Compute the number of effective entries.
double ComputeVariance(std::size_t dim=0) const
Compute the variance of unbinned values.
void Streamer(TBuffer &)
ROOT Streamer function to throw when trying to store an object of this class.
std::uint64_t GetNEntries() const
double ComputeMean(std::size_t dim=0) const
Compute the arithmetic mean of unbinned values.
double ComputeKurtosis(std::size_t dim=0) const
Compute the (excess) kurtosis of unbinned values.
RHistStats(std::size_t nDimensions)
Construct a statistics object.
std::uint64_t fNEntries
The number of entries.
double ComputeStdDev(std::size_t dim=0) const
Compute the standard deviation of unbinned values.
Buffer base class used for serializing objects.
Definition TBuffer.h:43
Double_t x[n]
Definition legend1.C:17
#define I(x, y, z)
void Add(const RDimensionStats &other)
A weight for filling histograms.
Definition RWeight.hxx:17