Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
GoFTest.h
Go to the documentation of this file.
1// @(#)root/mathcore:$Id$
2// Authors: Bartolomeu Rabacal 05/2010
3/**********************************************************************
4 * *
5 * Copyright (c) 2006 , LCG ROOT MathLib Team *
6 * *
7 * *
8 **********************************************************************/
9// Header file for GoFTest
10
11#ifndef ROOT_Math_GoFTest
12#define ROOT_Math_GoFTest
13
15#include "TMath.h"
16
17#include <memory>
18#include <vector>
19
20/*
21*/
22
23namespace ROOT {
24
25 namespace Fit {
26 class BinData;
27 }
28namespace Math {
29
30///// @defgroup GoFClasses Goodness of Fit Statistical Tests Tools
31
32/*
33 Class for Goodness of Fit tests implementing the Anderson-Darling and Kolmogorov-Smirnov 1- and 2-Samples Goodness of Fit Tests.
34 @ingroup MathCore
35
36 */
37
38
39class GoFTest {
40public:
41
42 enum EDistribution { // H0 distributions for using only with 1-sample tests
43 kUndefined, // Default value for non templated 1-sample test. Set with SetDistribution
44 kUserDefined, // For internal use only within the class's template constructor
48 };
49
50 enum EUserDistribution { // User input distribution option
52 kPDF // Default value
53 };
54
55 enum ETestType { // Goodness of Fit test types for using with the class's unary funtions as a shorthand for the in-built methods
56 kAD, // Anderson-Darling Test. Default value
57 kAD2s, // Anderson-Darling 2-Samples Test
58 kKS, // Kolmogorov-Smirnov Test
59 kKS2s // Kolmogorov-Smirnov 2-Samples Test
60 };
61
62 /* Constructor for using only with 2-samples tests */
63 GoFTest(UInt_t sample1Size, const Double_t* sample1, UInt_t sample2Size, const Double_t* sample2);
64
65 /* Constructor for using only with 1-sample tests with a specified distribution */
66 GoFTest(UInt_t sampleSize, const Double_t* sample, EDistribution dist = kUndefined, const std::vector<double> & distParams = {});
67
68 /* Templated constructor for using only with 1-sample tests with a user specified distribution */
69 template<class Dist>
70 GoFTest(UInt_t sampleSize, const Double_t* sample, Dist& dist, EUserDistribution userDist = kPDF,
71 Double_t xmin = 1, Double_t xmax = 0)
72 {
73 Instantiate(sample, sampleSize);
74 SetUserDistribution<Dist>(dist, userDist, xmin, xmax);
75 }
76
77 /* Specialization using IGenFunction interface */
78 GoFTest(UInt_t sampleSize, const Double_t* sample, const IGenFunction& dist, EUserDistribution userDist = kPDF,
79 Double_t xmin = 1, Double_t xmax = 0)
80 {
81 Instantiate(sample, sampleSize);
82 SetUserDistribution(dist, userDist, xmin, xmax);
83 }
84
85 /* Sets the user input distribution function for 1-sample tests. */
86 template<class Dist>
87 void SetUserDistribution(Dist& dist, EUserDistribution userDist = kPDF, Double_t xmin = 1, Double_t xmax = 0) {
88 WrappedFunction<Dist&> wdist(dist);
89 SetDistributionFunction(wdist, userDist, xmin, xmax);
90 }
91
92 /* Template specialization to set the user input distribution for 1-sample tests */
94 SetDistributionFunction(dist, userDist, xmin, xmax);
95 }
96
97 /* Sets the user input distribution as a probability density function for 1-sample tests */
98 template<class Dist>
99 void SetUserPDF(Dist& pdf, Double_t xmin = 1, Double_t xmax = 0) {
100 SetUserDistribution<Dist>(pdf, kPDF, xmin, xmax);
101 }
102
103 /* Template specialization to set the user input distribution as a probability density function for 1-sample tests */
104 void SetUserPDF(const IGenFunction& pdf, Double_t xmin = 1, Double_t xmax = 0) {
106 }
107
108 /* Sets the user input distribution as a cumulative distribution function for 1-sample tests
109 The CDF must return zero
110 */
111 template<class Dist>
112 void SetUserCDF(Dist& cdf, Double_t xmin = 1, Double_t xmax = 0) {
113 SetUserDistribution<Dist>(cdf, kCDF, xmin, xmax);
114 }
115
116 /* Template specialization to set the user input distribution as a cumulative distribution function for 1-sample tests */
117 void SetUserCDF(const IGenFunction& cdf, Double_t xmin = 1, Double_t xmax = 0) {
119 }
120
121
122 /* Sets the distribution for the predefined distribution types and optionally its parameters */
123 void SetDistribution(EDistribution dist, const std::vector<double> & distParams = {});
124
125
126 virtual ~GoFTest();
127
128/*
129 The Anderson-Darling K-Sample Test algorithm is described and taken from
130 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andeksam.htm
131 and described and taken from
132 (1) Scholz F.W., Stephens M.A. (1987), K-sample Anderson-Darling Tests, Journal of the American Statistical Association, 82, 918–924. (2-samples variant implemented)
133*/ void AndersonDarling2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
134 Double_t AndersonDarling2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
135
136/*
137 The Anderson-Darling 1-Sample Test algorithm for a specific distribution is described at
138 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andedarl.htm
139 and described and taken from (2)
140 Marsaglia J.C.W., Marsaglia G. (2004), Evaluating the Anderson-Darling Distribution, Journal of Statistical Software, Volume 09, Issue i02.
141 and described and taken from (3)
142 Lewis P.A.W. (1961), The Annals of Mathematical Statistics, Distribution of the Anderson-Darling Statistic, Volume 32, Number 4, 1118-1124.
143*/ void AndersonDarlingTest(Double_t& pvalue, Double_t& testStat) const;
144 Double_t AndersonDarlingTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
145
146/*
147 The Kolmogorov-Smirnov 2-Samples Test algorithm is described at
148 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ks2samp.htm
149 and described and taken from
150 http://root.cern.ch/root/html/TMath.html#TMath:KolmogorovTest
151*/ void KolmogorovSmirnov2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
152 Double_t KolmogorovSmirnov2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
153
154/*
155 The Kolmogorov-Smirnov 1-Sample Test algorithm for a specific distribution is described at
156 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/kstest.htm
157 and described and taken from (4)
158 Press W. H., Teukolsky S.A., Vetterling W.T., Flannery B.P. (2007), Numerical Recipes - The Art of Scientific Computing (Third Edition), Cambridge Univerdity Press
159*/ void KolmogorovSmirnovTest(Double_t& pvalue, Double_t& testStat) const;
160 Double_t KolmogorovSmirnovTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
161
162 // The class's unary functions
163 void operator()(ETestType test, Double_t& pvalue, Double_t& testStat) const;
164
165 // Returns default Anderson Darling 1-Sample Test and default p-value; option "t" returns the test statistic value
166 // specific to the test type
167 Double_t operator()(ETestType test = kAD, const Char_t* option = "p") const;
168
169 // Computation of the K-Sample Anderson-Darling Test's p-value as described in (1)
170 // given a normalized test statistic. The first variant described in the paper is used
171 static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2 );
172
173 // Compute The 2-Sample Anderson Darling test for binned data
174 static void AndersonDarling2SamplesTest(const ROOT::Fit::BinData & data1, const ROOT::Fit::BinData & data2, Double_t& pvalue, Double_t& testStat);
175
176private:
177
178 GoFTest(); // Disallowed default constructor
179 GoFTest(GoFTest& gof); // Disallowed copy constructor
180 GoFTest operator=(GoFTest& gof); // Disallowed assign operator
181
182 std::unique_ptr<IGenFunction> fCDF;
183
184
185 EDistribution fDist; /// Type of distribution
186 std::vector<Double_t> fParams; /// The distribution parameters (e.g. fParams[0] = mean, fParams[1] = sigma for a Gaussian)
187
188 std::vector<Double_t> fCombinedSamples;
189
190 std::vector<std::vector<Double_t> > fSamples;
191
193
194 void SetCDF();
196
197 void Instantiate(const Double_t* sample, UInt_t sampleSize);
198
199
203
204 static Double_t GetSigmaN(const std::vector<UInt_t> & ns, UInt_t N); // Computation of sigma_N as described in (1)
205
206 static Double_t InterpolatePValues(int nsamples,Double_t A2); // Linear interpolation used in GoFTest::PValueAD2Samples
207
208
209 Double_t PValueAD1Sample(Double_t A2) const; // Computation of the 1-Sample Anderson-Darling Test's p-value
210
211 void LogSample(); // Applies the logarithm to the sample when the specified distribution to test is LogNormal
212
213 void SetSamples(std::vector<const Double_t*> samples, const std::vector<UInt_t> samplesSizes);
214
215 void SetParameters(const std::vector<double> & params); // Sets the distribution parameters
216}; // end GoFTest class
217
218
219} // ROOT namespace
220} // Math namespace
221#endif
char Char_t
Definition RtypesCore.h:37
unsigned int UInt_t
Definition RtypesCore.h:46
double Double_t
Definition RtypesCore.h:59
#define N
float xmin
float xmax
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
Definition BinData.h:52
void SetUserDistribution(Dist &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition GoFTest.h:87
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
Definition GoFTest.cxx:352
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
Definition GoFTest.cxx:207
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
Definition GoFTest.cxx:266
GoFTest(GoFTest &gof)
std::unique_ptr< IGenFunction > fCDF
Definition GoFTest.h:182
Bool_t fTestSampleFromH0
Definition GoFTest.h:192
void SetUserPDF(const IGenFunction &pdf, Double_t xmin=1, Double_t xmax=0)
Definition GoFTest.h:104
void SetUserPDF(Dist &pdf, Double_t xmin=1, Double_t xmax=0)
Definition GoFTest.h:99
EDistribution fDist
Definition GoFTest.h:185
GoFTest operator=(GoFTest &gof)
void SetSamples(std::vector< const Double_t * > samples, const std::vector< UInt_t > samplesSizes)
Definition GoFTest.cxx:180
void SetUserCDF(const IGenFunction &cdf, Double_t xmin=1, Double_t xmax=0)
Definition GoFTest.h:117
std::vector< Double_t > fCombinedSamples
The distribution parameters (e.g. fParams[0] = mean, fParams[1] = sigma for a Gaussian)
Definition GoFTest.h:188
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
Definition GoFTest.cxx:920
std::vector< Double_t > fParams
Type of distribution.
Definition GoFTest.h:186
GoFTest(UInt_t sampleSize, const Double_t *sample, const IGenFunction &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition GoFTest.h:78
Double_t LogNormalCDF(Double_t x) const
void Instantiate(const Double_t *sample, UInt_t sampleSize)
Definition GoFTest.cxx:278
void SetDistribution(EDistribution dist, const std::vector< double > &distParams={})
Definition GoFTest.cxx:123
Double_t GaussianCDF(Double_t x) const
Definition GoFTest.cxx:294
GoFTest(UInt_t sampleSize, const Double_t *sample, Dist &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition GoFTest.h:70
void SetUserDistribution(const IGenFunction &dist, GoFTest::EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition GoFTest.h:93
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition GoFTest.cxx:645
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
Definition GoFTest.cxx:310
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition GoFTest.cxx:895
std::vector< std::vector< Double_t > > fSamples
Definition GoFTest.h:190
Double_t PValueAD1Sample(Double_t A2) const
Definition GoFTest.cxx:482
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
Definition GoFTest.cxx:861
Double_t ExponentialCDF(Double_t x) const
Definition GoFTest.cxx:298
void SetParameters(const std::vector< double > &params)
Definition GoFTest.cxx:203
static Double_t InterpolatePValues(int nsamples, Double_t A2)
void SetUserCDF(Dist &cdf, Double_t xmin=1, Double_t xmax=0)
Definition GoFTest.h:112
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Definition IFunction.h:135
Template class to wrap any C++ callable object which takes one argument i.e.
Double_t x[n]
Definition legend1.C:17
TFitResultPtr Fit(FitObject *h1, TF1 *f1, Foption_t &option, const ROOT::Math::MinimizerOptions &moption, const char *goption, ROOT::Fit::DataRange &range)
Definition HFitImpl.cxx:133
Namespace for new Math classes and functions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition test.py:1