47 virtual ~CDFWrapper() {
if (fCDF)
delete fCDF; }
65 if (x <= fXmin)
return 0;
66 if (x >= fXmax)
return 1.0;
67 return (*fCDF)(
x)/fNorm;
71 return new CDFWrapper(*fCDF,fXmin,fXmax);
80 mutable IntegratorOneDim fIntegral;
84 virtual ~PDFIntegral() {
if (fPDF)
delete fPDF; }
93 fIntegral.SetFunction(*fPDF);
99 fNorm = fIntegral.Integral();
102 fNorm = fIntegral.IntegralLow(fXmax);
104 fNorm = fIntegral.IntegralUp(fXmin);
106 fNorm = fIntegral.Integral(fXmin, fXmax);
110 if (x <= fXmin)
return 0;
111 if (x >= fXmax)
return 1.0;
113 return fIntegral.IntegralLow(x)/fNorm;
115 return fIntegral.Integral(fXmin,x)/fNorm;
119 return new PDFIntegral(*fPDF, fXmin, fXmax);
125 MATH_ERROR_MSG(
"SetDistribution",
"Cannot set distribution type! Distribution type option must be ennabled.");
135 fSamples(std::vector<std::vector<
Double_t> >(2)),
136 fTestSampleFromH0(
kFALSE) {
137 Bool_t badSampleArg = sample1 == 0 || sample1Size == 0;
139 std::string msg =
"'sample1";
140 msg += !sample1Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
144 badSampleArg = sample2 == 0 || sample2Size == 0;
146 std::string msg =
"'sample2";
147 msg += !sample2Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
151 std::vector<const Double_t*> samples(2);
152 std::vector<UInt_t> samplesSizes(2);
153 samples[0] = sample1;
154 samples[1] = sample2;
155 samplesSizes[0] = sample1Size;
156 samplesSizes[1] = sample2Size;
164 fSamples(std::vector<std::vector<
Double_t> >(1)),
165 fTestSampleFromH0(
kTRUE) {
166 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
168 std::string msg =
"'sample";
169 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
173 std::vector<const Double_t*> samples(1, sample);
174 std::vector<UInt_t> samplesSizes(1, sampleSize);
183 fCombinedSamples.assign(std::accumulate(samplesSizes.begin(), samplesSizes.end(), 0), 0.0);
184 UInt_t combinedSamplesSize = 0;
185 for (
UInt_t i = 0; i < samples.size(); ++i) {
186 fSamples[i].assign(samples[i], samples[i] + samplesSizes[i]);
188 for (
UInt_t j = 0; j < samplesSizes[i]; ++j) {
191 combinedSamplesSize += samplesSizes[i];
196 if (degenerateSamples) {
197 std::string msg =
"Degenerate sample";
198 msg += samplesSizes.size() > 1 ?
"s!" :
"!";
199 msg +=
" Sampling values all identical.";
201 assert(!degenerateSamples);
262 fCDF = std::auto_ptr<IGenFunction>(
cdf);
267 MATH_WARN_MSG(
"SetDistributionFunction",
"Distribution type is changed to user defined");
272 fCDF = std::auto_ptr<IGenFunction>(
new PDFIntegral(f, xmin, xmax) );
274 fCDF = std::auto_ptr<IGenFunction>(
new CDFWrapper(f, xmin, xmax) );
279 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
281 std::string msg =
"'sample";
282 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
290 fSamples = std::vector<std::vector<Double_t> >(1);
292 SetSamples(std::vector<const Double_t*>(1, sample), std::vector<UInt_t>(1, sampleSize));
314 Double_t sigmaN = 0.0,
h = 0.0,
H = 0.0,
g = 0.0,
a, b,
c,
d, k = ns.size();
316 for (
UInt_t i = 0; i < ns.size(); ++i) {
323 std::vector<double> invI(N);
324 for (
UInt_t i = 1; i <= N - 1; ++i) {
328 for (
UInt_t i = 1; i <= N - 2; ++i) {
329 double tmp = invI[N-i];
330 for (
UInt_t j = i + 1; j <= N - 1; ++j) {
337 const double emc = 0.5772156649015328606065120900824024;
342 a = (4 *
g - 6) * k + (10 - 6 *
g) *
H - 4 *
g + 6;
343 b = (2 * g - 4) * k2 + 8 *
h * k + (2 * g - 14 *
h - 4) *
H - 8 *
h + 4 * g - 6;
344 c = (6 *
h + 2 * g - 2) * k2 + (4 *
h - 4 *g + 6) * k + (2 *
h - 6) *
H + 4 *
h;
345 d = (2 *
h + 6) * k2 - 4 *
h * k;
382 double ts[ ] = { -1.1954, -1.5806, -1.8172,
383 -2.0032, -2.2526, -2.4204, -2.5283, -4.2649, -1.1786, -1.5394,
384 -1.7728, -1.9426, -2.1685, -2.3288, -2.4374, -3.8906, -1.166,
385 -1.5193, -1.7462, -1.9067, -2.126, -2.2818, -2.3926, -3.719,
386 -1.1407, -1.4659, -1.671, -1.8105, -2.0048, -2.1356, -2.2348,
387 -3.2905, -1.1253, -1.4371, -1.6314, -1.7619, -1.9396, -2.0637,
388 -2.1521, -3.0902, -1.0777, -1.3503, -1.5102, -1.6177, -1.761,
389 -1.8537, -1.9178, -2.5758, -1.0489, -1.2984, -1.4415, -1.5355,
390 -1.6625, -1.738, -1.7936, -2.3263, -0.9978, -1.2098, -1.3251,
391 -1.4007, -1.4977, -1.5555, -1.5941, -1.96, -0.9417, -1.1187,
392 -1.209, -1.2671, -1.3382, -1.379, -1.405, -1.6449, -0.8981, -1.0491,
393 -1.1235, -1.1692, -1.2249, -1.2552, -1.2755, -1.4395, -0.8598,
394 -0.9904, -1.0513, -1.0879, -1.1317, -1.155, -1.1694, -1.2816,
395 -0.7258, -0.7938, -0.8188, -0.8312, -0.8435, -0.8471, -0.8496,
396 -0.8416, -0.5966, -0.617, -0.6177, -0.6139, -0.6073, -0.5987,
397 -0.5941, -0.5244, -0.4572, -0.4383, -0.419, -0.4033, -0.3834,
398 -0.3676, -0.3587, -0.2533, -0.2966, -0.2428, -0.2078, -0.1844,
399 -0.1548, -0.1346, -0.1224, 0, -0.1009, -0.0169, 0.0304, 0.0596,
400 0.0933, 0.1156, 0.1294, 0.2533, 0.1571, 0.2635, 0.3169, 0.348,
401 0.3823, 0.4038, 0.4166, 0.5244, 0.5357, 0.6496, 0.6992, 0.7246,
402 0.7528, 0.7683, 0.7771, 0.8416, 1.2255, 1.2989, 1.3202, 1.3254,
403 1.3305, 1.3286, 1.3257, 1.2816, 1.5262, 1.5677, 1.5709, 1.5663,
404 1.5561, 1.5449, 1.5356, 1.4395, 1.9633, 1.943, 1.919, 1.8975,
405 1.8641, 1.8389, 1.8212, 1.6449, 2.7314, 2.5899, 2.5, 2.4451,
406 2.3664, 2.3155, 2.2823, 1.96, 3.7825, 3.4425, 3.2582, 3.1423,
407 3.0036, 2.9101, 2.8579, 2.3263, 4.1241, 3.716, 3.4984, 3.3651,
408 3.2003, 3.0928, 3.0311, 2.4324, 4.6044, 4.0847, 3.8348, 3.6714,
409 3.4721, 3.3453, 3.2777, 2.5758, 5.409, 4.7223, 4.4022, 4.1791,
410 3.9357, 3.7809, 3.6963, 2.807, 6.4954, 5.5823, 5.1456, 4.8657,
411 4.5506, 4.3275, 4.2228, 3.0902, 6.8279, 5.8282, 5.3658, 5.0749,
412 4.7318, 4.4923, 4.3642, 3.1747, 7.2755, 6.197, 5.6715, 5.3642,
413 4.9991, 4.7135, 4.5945, 3.2905, 8.1885, 6.8537, 6.2077, 5.8499,
414 5.4246, 5.1137, 4.9555, 3.4808, 9.3061, 7.6592, 6.85, 6.4806,
415 5.9919, 5.6122, 5.5136, 3.719, 9.6132, 7.9234, 7.1025, 6.6731,
416 6.1549, 5.8217, 5.7345, 3.7911, 10.0989, 8.2395, 7.4326, 6.9567,
417 6.3908, 6.011, 5.9566, 3.8906, 10.8825, 8.8994, 7.8934, 7.4501,
418 6.9009, 6.4538, 6.2705, 4.0556, 11.8537, 9.5482, 8.5568, 8.0283,
419 7.4418, 6.9524, 6.6195, 4.2649 };
426 double p[] = { .00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,.1,.2,.3,.4,.5,.6,.7,.8,.9,
427 .925,.95,.975,.99,.9925,.995,.9975,.999,.99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999 };
430 const int nbins = 35;
437 MATH_ERROR_MSG(
"InterpolatePValues",
"Interpolation not implemented for nsamples not equal to 2");
440 std::vector<double> ts2(nbins);
441 std::vector<double> lp(nbins);
442 for (
int i = 0; i <
nbins; ++i)
444 ts2[i] = ts[offset+ i * ns];
446 lp[i] =
std::log( p[i]/(1.-p[i] ) );
450 int i1 =
std::distance(ts2.begin(), std::lower_bound(ts2.begin(), ts2.end(), tx ) ) - 1;
458 if (i2 >=
int(ts2.size()) ) {
464 assert(i1 < (
int) lp.size() && i2 < (int) lp.size() );
467 double tx1 = ts2[i1];
468 double tx2 = ts2[i2];
472 double lp0 = (lp1-lp2) * (tx - tx2)/ ( tx1-tx2) + lp2;
475 double p0 =
exp(lp0)/(1. +
exp(lp0) );
487 }
else if (A2 < 2.) {
488 pvalue =
std::pow(A2, -0.5) *
std::exp(-1.2337141 / A2) * (2.00012 + (0.247105 - (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * A2) * A2) * A2) * A2) * A2);
490 pvalue =
std::exp(-1. *
std::exp(1.0776 - (2.30695 - (0.43424 - (.082433 - (0.008056 - 0.0003146 * A2) * A2) * A2) * A2) * A2));
524 for (i = 0; i <
n; i++) {
538 for (i = 0; i <
n; i++) {
546 void adkTestStat(
double *adk,
const std::vector<std::vector<double> > & samples,
const std::vector<double> & zstar) {
552 int k = samples.size();
553 int l = zstar.size();
557 std::vector<int> fij (k*l);
561 std::vector<int> lvec(l);
579 std::vector<int> ns(k);
581 for (i = 0; i < k; i++) {
582 ns[i] = samples[i].size();
590 for (j = 0; j <
l; j++) {
592 for (i = 0; i < k; i++) {
593 fij[i + j*k] =
getCount(zstar[j], &samples[i][0], ns[i]);
594 lvec[j] += fij[i + j*k];
601 for (i = 0; i < k; i++) {
607 for (j = 0; j <
l; j++) {
609 maij = mij - (
double) fij[i + j*k] / 2.0;
610 bj =
getSum(&lvec[0], j + 1);
611 baj = bj - (
double) lvec[j] / 2.0;
614 tmp = (
double) nsum * mij - (
double) ns[i] * bj;
615 innerSum = innerSum + (
double) lvec[j] * tmp * tmp /
616 (bj * ((
double) nsum - bj));
619 tmp = (
double) nsum * maij - (
double) ns[i] * baj;
620 aInnerSum = aInnerSum + (
double) lvec[j] * tmp * tmp /
621 (baj * (nsum - baj) - nsum * (
double) lvec[j] / 4.0);
624 adk[0] = adk[0] + innerSum / ns[i];
625 adk[1] = adk[1] + aInnerSum / ns[i];
631 adk[0] = adk[0] / (
double) nsum;
632 adk[1] = (nsum - 1) * adk[1] / ((
double) nsum * (
double) nsum);
650 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
656 std::vector<Double_t>::iterator endUnique = std::unique(z.begin(), z.end());
657 z.erase(endUnique, z.end() );
658 std::vector<UInt_t>
h;
659 std::vector<Double_t>
H;
667 unsigned int nSamples =
fSamples.size();
670 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
675 std::cout <<
"time for H";
678 std::vector<std::vector<Double_t> >
F(nSamples);
679 for (
UInt_t i = 0; i < nSamples; ++i) {
680 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
682 F[i].push_back(std::count_if(
fSamples[i].begin(),
fSamples[i].end(), bind2nd(std::less<Double_t>(), *data)) + n / 2.);
685 std::cout <<
"time for F";
687 for (
UInt_t i = 0; i < nSamples; ++i) {
691 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
692 sum_result += h[j] *
TMath::Power(N * F[i][j]-
fSamples[i].size() * H[j], 2) / (H[j] * (N - H[j]) - N * h[j] / 4.0);
695 std::cout <<
"time for sum_resut";
697 std::cout <<
"sum_result " << sum_result << std::endl;
698 A2 += 1.0 /
fSamples[i].size() * sum_result;
702 std::cout <<
"A2 - old Bartolomeo code " << A2 << std::endl;
707 double adk[2] = {0,0};
729 std::vector<UInt_t> ns(
fSamples.size());
730 for (
unsigned int k = 0; k < ns.size(); ++k) ns[k] =
fSamples[k].size();
757 if (data1.
NDim() != 1 && data2.
NDim() != 1) {
758 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Bin Data set must be one-dimensional ");
761 unsigned int n1 = data1.
Size();
762 unsigned int n2 = data2.
Size();
768 std::vector<double> xdata(n1+n2);
769 for (
unsigned int i = 0; i < n1; ++i) {
771 const double * x = data1.
GetPoint(i, value);
775 for (
unsigned int i = 0; i < n2; ++i) {
777 const double * x = data2.
GetPoint(i, value);
781 double nall = ntot1+ntot2;
783 std::vector<unsigned int> index(n1+n2);
796 double x = xdata[ index[j] ];
801 unsigned int i = index[k];
804 value = data1.
Value(i);
811 value = data2.
Value(i);
818 }
while ( k < n1+n2 && xdata[ index[k] ] == x );
824 double tmp1 = ( nall * sum1 - ntot1 * sumAll );
825 double tmp2 = ( nall * sum2 - ntot2 * sumAll );
826 adsum += t * (tmp1*tmp1/ntot1 + tmp2*tmp2/ntot2) / ( sumAll * (nall - sumAll) ) ;
831 double A2 = adsum / nall;
834 std::vector<unsigned int> ns(2);
855 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
864 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
868 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
873 for (
Int_t i = 0; i <
n ; ++i) {
881 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Cannot compute p-value: data below or above the distribution's thresholds. Check sample consistency.");
891 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
898 MATH_ERROR_MSG(
"KolmogorovSmirnov2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
903 std::vector<Double_t>
a(na);
904 std::vector<Double_t> b(nb);
914 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
923 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
927 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
932 for (
UInt_t i = 0; i <
n; ++i) {
936 if (result > Dn) Dn =
result;
946 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
void SetDistribution(EDistribution dist)
Double_t PValueAD1Sample(Double_t A2) const
double dist(Rotation3D const &r1, Rotation3D const &r2)
void Print(Option_t *option="") const
Print the real and cpu time passed between the start and stop events.
int getCount(double z, const double *dat, int n)
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Double_t KolmogorovProb(Double_t z)
Calculates the Kolmogorov distribution function, Begin_Html.
std::vector< Double_t > fCombinedSamples
Double_t distance(const TPoint2 &p1, const TPoint2 &p2)
std::auto_ptr< IGenFunction > fCDF
double inner_product(const LAVector &, const LAVector &)
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
#define MATH_WARN_MSG(loc, str)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
unsigned int Size() const
return number of fit points
double cdf(double *x, double *p)
double normal_cdf(double x, double sigma=1, double x0=0)
Cumulative distribution function of the normal (Gaussian) distribution (lower tail).
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
double pow(double, double)
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
#define MATH_ERROR_MSG(loc, str)
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
int getSum(const int *x, int n)
IBaseFunctionOneDim IGenFunction
Double_t GaussianCDF(Double_t x) const
Double_t KolmogorovTest(Int_t na, const Double_t *a, Int_t nb, const Double_t *b, Option_t *option)
Statistical test whether two one-dimensional sets of points are compatible with coming from the same ...
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
std::vector< std::vector< Double_t > > fSamples
static const double x1[5]
void Instantiate(const Double_t *sample, UInt_t sampleSize)
double Value(unsigned int ipoint) const
return the value for the given fit point
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
void SetSamples(std::vector< const Double_t * > samples, const std::vector< UInt_t > samplesSizes)
const double * GetPoint(unsigned int ipoint, double &value) const
retrieve at the same time a pointer to the coordinate data and the fit value More efficient than call...
Double_t Sqrt(Double_t x)
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
Template class to wrap any member function of a class taking a double and returning a double in a 1D ...
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
void adkTestStat(double *adk, const std::vector< std::vector< double > > &samples, const std::vector< double > &zstar)
unsigned int NDim() const
return coordinate data dimension
double exponential_cdf(double x, double lambda, double x0=0)
Cumulative distribution function of the exponential distribution (lower tail).
Double_t ExponentialCDF(Double_t x) const