Logo ROOT  
Reference Guide
GoFTest.h
Go to the documentation of this file.
1// @(#)root/mathcore:$Id$
2// Authors: Bartolomeu Rabacal 05/2010
3/**********************************************************************
4 * *
5 * Copyright (c) 2006 , LCG ROOT MathLib Team *
6 * *
7 * *
8 **********************************************************************/
9// Header file for GoFTest
10
11#ifndef ROOT_Math_GoFTest
12#define ROOT_Math_GoFTest
13
15#include "TMath.h"
16
17#include <memory>
18#include <vector>
19
20/*
21*/
22
23namespace ROOT {
24
25 namespace Fit {
26 class BinData;
27 }
28namespace Math {
29
30///// @defgroup GoFClasses Goodness of Fit Statistical Tests Tools
31
32/*
33 Class for Goodness of Fit tests implementing the Anderson-Darling and Kolmogorov-Smirnov 1- and 2-Samples Goodness of Fit Tests.
34 @ingroup MathCore
35
36 */
37
38
39class GoFTest {
40public:
41
42 enum EDistribution { // H0 distributions for using only with 1-sample tests
43 kUndefined, // Default value for non templated 1-sample test. Set with SetDistribution
44 kUserDefined, // For internal use only within the class's template constructor
48 };
49
50 enum EUserDistribution { // User input distribution option
52 kPDF // Default value
53 };
54
55 enum ETestType { // Goodness of Fit test types for using with the class's unary funtions as a shorthand for the in-built methods
56 kAD, // Anderson-Darling Test. Default value
57 kAD2s, // Anderson-Darling 2-Samples Test
58 kKS, // Kolmogorov-Smirnov Test
59 kKS2s // Kolmogorov-Smirnov 2-Samples Test
60 };
61
62 /* Constructor for using only with 2-samples tests */
63 GoFTest(UInt_t sample1Size, const Double_t* sample1, UInt_t sample2Size, const Double_t* sample2);
64
65 /* Constructor for using only with 1-sample tests with a specified distribution */
66 GoFTest(UInt_t sampleSize, const Double_t* sample, EDistribution dist = kUndefined);
67
68 /* Templated constructor for using only with 1-sample tests with a user specified distribution */
69 template<class Dist>
70 GoFTest(UInt_t sampleSize, const Double_t* sample, Dist& dist, EUserDistribution userDist = kPDF,
71 Double_t xmin = 1, Double_t xmax = 0)
72 {
73 Instantiate(sample, sampleSize);
74 SetUserDistribution<Dist>(dist, userDist, xmin, xmax);
75 }
76
77 /* Specialization using IGenFunction interface */
78 GoFTest(UInt_t sampleSize, const Double_t* sample, const IGenFunction& dist, EUserDistribution userDist = kPDF,
79 Double_t xmin = 1, Double_t xmax = 0)
80 {
81 Instantiate(sample, sampleSize);
82 SetUserDistribution(dist, userDist, xmin, xmax);
83 }
84
85 /* Sets the user input distribution function for 1-sample tests. */
86 template<class Dist>
89 SetDistributionFunction(wdist, userDist, xmin, xmax);
90 }
91
92 /* Template specialization to set the user input distribution for 1-sample tests */
95 }
96
97 /* Sets the user input distribution as a probability density function for 1-sample tests */
98 template<class Dist>
99 void SetUserPDF(Dist& pdf, Double_t xmin = 1, Double_t xmax = 0) {
100 SetUserDistribution<Dist>(pdf, kPDF, xmin, xmax);
101 }
102
103 /* Template specialization to set the user input distribution as a probability density function for 1-sample tests */
104 void SetUserPDF(const IGenFunction& pdf, Double_t xmin = 1, Double_t xmax = 0) {
106 }
107
108 /* Sets the user input distribution as a cumulative distribution function for 1-sample tests
109 The CDF must return zero
110 */
111 template<class Dist>
112 void SetUserCDF(Dist& cdf, Double_t xmin = 1, Double_t xmax = 0) {
113 SetUserDistribution<Dist>(cdf, kCDF, xmin, xmax);
114 }
115
116 /* Template specialization to set the user input distribution as a cumulative distribution function for 1-sample tests */
117 void SetUserCDF(const IGenFunction& cdf, Double_t xmin = 1, Double_t xmax = 0) {
119 }
120
121
122 /* Sets the distribution for the predefined distribution types */
124
125
126 virtual ~GoFTest();
127
128/*
129 The Anderson-Darling K-Sample Test algorithm is described and taken from
130 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andeksam.htm
131 and described and taken from
132 (1) Scholz F.W., Stephens M.A. (1987), K-sample Anderson-Darling Tests, Journal of the American Statistical Association, 82, 918–924. (2-samples variant implemented)
133*/ void AndersonDarling2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
134 Double_t AndersonDarling2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
135
136/*
137 The Anderson-Darling 1-Sample Test algorithm for a specific distribution is described at
138 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andedarl.htm
139 and described and taken from (2)
140 Marsaglia J.C.W., Marsaglia G. (2004), Evaluating the Anderson-Darling Distribution, Journal of Statistical Software, Volume 09, Issue i02.
141 and described and taken from (3)
142 Lewis P.A.W. (1961), The Annals of Mathematical Statistics, Distribution of the Anderson-Darling Statistic, Volume 32, Number 4, 1118-1124.
143*/ void AndersonDarlingTest(Double_t& pvalue, Double_t& testStat) const;
144 Double_t AndersonDarlingTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
145
146/*
147 The Kolmogorov-Smirnov 2-Samples Test algorithm is described at
148 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ks2samp.htm
149 and described and taken from
150 http://root.cern.ch/root/html/TMath.html#TMath:KolmogorovTest
151*/ void KolmogorovSmirnov2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
152 Double_t KolmogorovSmirnov2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
153
154/*
155 The Kolmogorov-Smirnov 1-Sample Test algorithm for a specific distribution is described at
156 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/kstest.htm
157 and described and taken from (4)
158 Press W. H., Teukolsky S.A., Vetterling W.T., Flannery B.P. (2007), Numerical Recipes - The Art of Scientific Computing (Third Edition), Cambridge Univerdity Press
159*/ void KolmogorovSmirnovTest(Double_t& pvalue, Double_t& testStat) const;
160 Double_t KolmogorovSmirnovTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
161
162 // The class's unary functions
163 void operator()(ETestType test, Double_t& pvalue, Double_t& testStat) const;
164
165 // Returns default Anderson Darling 1-Sample Test and default p-value; option "t" returns the test statistic value
166 // specific to the test type
167 Double_t operator()(ETestType test = kAD, const Char_t* option = "p") const;
168
169 // Computation of the K-Sample Anderson-Darling Test's p-value as described in (1)
170 // given a normalized test statistic. The first variant described in the paper is used
171 static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2 );
172
173 // Compute The 2-Sample Anderson Darling test for binned data
174 static void AndersonDarling2SamplesTest(const ROOT::Fit::BinData & data1, const ROOT::Fit::BinData & data2, Double_t& pvalue, Double_t& testStat);
175
176private:
177
178 GoFTest(); // Disallowed default constructor
179 GoFTest(GoFTest& gof); // Disallowed copy constructor
180 GoFTest operator=(GoFTest& gof); // Disallowed assign operator
181
182 std::unique_ptr<IGenFunction> fCDF;
183
184
186
189
190 std::vector<Double_t> fCombinedSamples;
191
192 std::vector<std::vector<Double_t> > fSamples;
193
195
196 void SetCDF();
198
199 void Instantiate(const Double_t* sample, UInt_t sampleSize);
200
201
205
206 static Double_t GetSigmaN(const std::vector<UInt_t> & ns, UInt_t N); // Computation of sigma_N as described in (1)
207
208 static Double_t InterpolatePValues(int nsamples,Double_t A2); // Linear interpolation used in GoFTest::PValueAD2Samples
209
210
211 Double_t PValueAD1Sample(Double_t A2) const; // Computation of the 1-Sample Anderson-Darling Test's p-value
212
213 void LogSample(); // Applies the logarithm to the sample when the specified distribution to test is LogNormal
214
215 void SetSamples(std::vector<const Double_t*> samples, const std::vector<UInt_t> samplesSizes);
216
217 void SetParameters(); // Sets the estimated mean and standard-deviation from the samples
218}; // end GoFTest class
219
220
221} // ROOT namespace
222} // Math namespace
223#endif
char Char_t
Definition: RtypesCore.h:33
unsigned int UInt_t
Definition: RtypesCore.h:46
bool Bool_t
Definition: RtypesCore.h:63
double Double_t
Definition: RtypesCore.h:59
#define N
float xmin
Definition: THbookFile.cxx:95
float xmax
Definition: THbookFile.cxx:95
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
Definition: BinData.h:52
void SetUserDistribution(Dist &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:87
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
Definition: GoFTest.cxx:353
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:208
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
Definition: GoFTest.cxx:264
GoFTest(GoFTest &gof)
std::unique_ptr< IGenFunction > fCDF
Definition: GoFTest.h:182
Bool_t fTestSampleFromH0
Definition: GoFTest.h:194
void SetUserPDF(const IGenFunction &pdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:104
void SetUserPDF(Dist &pdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:99
EDistribution fDist
Definition: GoFTest.h:185
GoFTest operator=(GoFTest &gof)
void SetSamples(std::vector< const Double_t * > samples, const std::vector< UInt_t > samplesSizes)
Definition: GoFTest.cxx:180
void SetUserCDF(const IGenFunction &cdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:117
Double_t fSigma
Definition: GoFTest.h:188
std::vector< Double_t > fCombinedSamples
Definition: GoFTest.h:190
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:921
GoFTest(UInt_t sampleSize, const Double_t *sample, const IGenFunction &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:78
void SetDistribution(EDistribution dist)
Definition: GoFTest.cxx:123
Double_t LogNormalCDF(Double_t x) const
void Instantiate(const Double_t *sample, UInt_t sampleSize)
Definition: GoFTest.cxx:276
virtual ~GoFTest()
Definition: GoFTest.cxx:178
Double_t GaussianCDF(Double_t x) const
Definition: GoFTest.cxx:294
GoFTest(UInt_t sampleSize, const Double_t *sample, Dist &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:70
void SetUserDistribution(const IGenFunction &dist, GoFTest::EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:93
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:646
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
Definition: GoFTest.cxx:311
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:896
std::vector< std::vector< Double_t > > fSamples
Definition: GoFTest.h:192
Double_t PValueAD1Sample(Double_t A2) const
Definition: GoFTest.cxx:483
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:862
Double_t ExponentialCDF(Double_t x) const
Definition: GoFTest.cxx:298
static Double_t InterpolatePValues(int nsamples, Double_t A2)
Double_t fMean
Definition: GoFTest.h:187
void SetUserCDF(Dist &cdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:112
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Definition: IFunction.h:135
Template class to wrap any C++ callable object which takes one argument i.e.
Double_t x[n]
Definition: legend1.C:17
TFitResultPtr Fit(FitObject *h1, TF1 *f1, Foption_t &option, const ROOT::Math::MinimizerOptions &moption, const char *goption, ROOT::Fit::DataRange &range)
Definition: HFitImpl.cxx:133
Namespace for new Math classes and functions.
double Dist(void *xp, void *yp)
double dist(Rotation3D const &r1, Rotation3D const &r2)
Definition: 3DDistances.cxx:48
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
static constexpr double ns
Definition: test.py:1