54 fXmin = -std::numeric_limits<double>::infinity();
55 fXmax = std::numeric_limits<double>::infinity();
66 if (
x >=
fXmax)
return 1.0;
95 fXmin = -std::numeric_limits<double>::infinity();
96 fXmax = std::numeric_limits<double>::infinity();
98 if (
fXmin == -std::numeric_limits<double>::infinity() &&
fXmax == std::numeric_limits<double>::infinity() ) {
101 else if (
fXmin == -std::numeric_limits<double>::infinity() )
103 else if (
fXmax == std::numeric_limits<double>::infinity() )
111 if (
x >=
fXmax)
return 1.0;
112 if (
fXmin == -std::numeric_limits<double>::infinity() )
125 MATH_ERROR_MSG(
"SetDistribution",
"Cannot set distribution type! Distribution type option must be enabled.");
135 fSamples(std::vector<std::vector<
Double_t> >(2)),
136 fTestSampleFromH0(
kFALSE) {
137 Bool_t badSampleArg = sample1 == 0 || sample1Size == 0;
139 std::string msg =
"'sample1";
140 msg += !sample1Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
142 assert(!badSampleArg);
144 badSampleArg = sample2 == 0 || sample2Size == 0;
146 std::string msg =
"'sample2";
147 msg += !sample2Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
149 assert(!badSampleArg);
151 std::vector<const Double_t*> samples(2);
152 std::vector<UInt_t> samplesSizes(2);
153 samples[0] = sample1;
154 samples[1] = sample2;
155 samplesSizes[0] = sample1Size;
156 samplesSizes[1] = sample2Size;
162 fSamples(std::vector<std::vector<
Double_t> >(1)),
163 fTestSampleFromH0(
kTRUE) {
164 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
166 std::string msg =
"'sample";
167 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
169 assert(!badSampleArg);
171 std::vector<const Double_t*> samples(1, sample);
172 std::vector<UInt_t> samplesSizes(1, sampleSize);
181 fCombinedSamples.assign(std::accumulate(samplesSizes.begin(), samplesSizes.end(), 0u), 0.0);
182 UInt_t combinedSamplesSize = 0;
183 for (
UInt_t i = 0; i < samples.size(); ++i) {
184 fSamples[i].assign(samples[i], samples[i] + samplesSizes[i]);
186 for (
UInt_t j = 0; j < samplesSizes[i]; ++j) {
189 combinedSamplesSize += samplesSizes[i];
194 if (degenerateSamples) {
195 std::string msg =
"Degenerate sample";
196 msg += samplesSizes.size() > 1 ?
"s!" :
"!";
197 msg +=
" Sampling values all identical.";
199 assert(!degenerateSamples);
268 MATH_WARN_MSG(
"SetDistributionFunction",
"Distribution type is changed to user defined");
280 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
282 std::string msg =
"'sample";
283 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
285 assert(!badSampleArg);
289 fSamples = std::vector<std::vector<Double_t> >(1);
291 SetSamples(std::vector<const Double_t*>(1, sample), std::vector<UInt_t>(1, sampleSize));
313 Double_t sigmaN = 0.0,
h = 0.0,
H = 0.0,
g = 0.0,
a,
b,
c,
d, k = ns.size();
315 for (
UInt_t i = 0; i < ns.size(); ++i) {
322 std::vector<double> invI(
N);
323 for (
UInt_t i = 1; i <=
N - 1; ++i) {
327 for (
UInt_t i = 1; i <=
N - 2; ++i) {
328 double tmp = invI[
N-i];
329 for (
UInt_t j = i + 1; j <=
N - 1; ++j) {
336 const double emc = 0.5772156649015328606065120900824024;
337 h = std::log(
double(
N-1) ) + emc;
340 double k2 = std::pow(k,2);
341 a = (4 *
g - 6) * k + (10 - 6 *
g) *
H - 4 *
g + 6;
342 b = (2 *
g - 4) * k2 + 8 *
h * k + (2 *
g - 14 *
h - 4) *
H - 8 *
h + 4 *
g - 6;
343 c = (6 *
h + 2 *
g - 2) * k2 + (4 *
h - 4 *
g + 6) * k + (2 *
h - 6) *
H + 4 *
h;
344 d = (2 *
h + 6) * k2 - 4 *
h * k;
345 sigmaN +=
a * std::pow(
double(
N),3) +
b * std::pow(
double(
N),2) +
c *
N +
d;
381 double ts[ ] = { -1.1954, -1.5806, -1.8172,
382 -2.0032, -2.2526, -2.4204, -2.5283, -4.2649, -1.1786, -1.5394,
383 -1.7728, -1.9426, -2.1685, -2.3288, -2.4374, -3.8906, -1.166,
384 -1.5193, -1.7462, -1.9067, -2.126, -2.2818, -2.3926, -3.719,
385 -1.1407, -1.4659, -1.671, -1.8105, -2.0048, -2.1356, -2.2348,
386 -3.2905, -1.1253, -1.4371, -1.6314, -1.7619, -1.9396, -2.0637,
387 -2.1521, -3.0902, -1.0777, -1.3503, -1.5102, -1.6177, -1.761,
388 -1.8537, -1.9178, -2.5758, -1.0489, -1.2984, -1.4415, -1.5355,
389 -1.6625, -1.738, -1.7936, -2.3263, -0.9978, -1.2098, -1.3251,
390 -1.4007, -1.4977, -1.5555, -1.5941, -1.96, -0.9417, -1.1187,
391 -1.209, -1.2671, -1.3382, -1.379, -1.405, -1.6449, -0.8981, -1.0491,
392 -1.1235, -1.1692, -1.2249, -1.2552, -1.2755, -1.4395, -0.8598,
393 -0.9904, -1.0513, -1.0879, -1.1317, -1.155, -1.1694, -1.2816,
394 -0.7258, -0.7938, -0.8188, -0.8312, -0.8435, -0.8471, -0.8496,
395 -0.8416, -0.5966, -0.617, -0.6177, -0.6139, -0.6073, -0.5987,
396 -0.5941, -0.5244, -0.4572, -0.4383, -0.419, -0.4033, -0.3834,
397 -0.3676, -0.3587, -0.2533, -0.2966, -0.2428, -0.2078, -0.1844,
398 -0.1548, -0.1346, -0.1224, 0, -0.1009, -0.0169, 0.0304, 0.0596,
399 0.0933, 0.1156, 0.1294, 0.2533, 0.1571, 0.2635, 0.3169, 0.348,
400 0.3823, 0.4038, 0.4166, 0.5244, 0.5357, 0.6496, 0.6992, 0.7246,
401 0.7528, 0.7683, 0.7771, 0.8416, 1.2255, 1.2989, 1.3202, 1.3254,
402 1.3305, 1.3286, 1.3257, 1.2816, 1.5262, 1.5677, 1.5709, 1.5663,
403 1.5561, 1.5449, 1.5356, 1.4395, 1.9633, 1.943, 1.919, 1.8975,
404 1.8641, 1.8389, 1.8212, 1.6449, 2.7314, 2.5899, 2.5, 2.4451,
405 2.3664, 2.3155, 2.2823, 1.96, 3.7825, 3.4425, 3.2582, 3.1423,
406 3.0036, 2.9101, 2.8579, 2.3263, 4.1241, 3.716, 3.4984, 3.3651,
407 3.2003, 3.0928, 3.0311, 2.4324, 4.6044, 4.0847, 3.8348, 3.6714,
408 3.4721, 3.3453, 3.2777, 2.5758, 5.409, 4.7223, 4.4022, 4.1791,
409 3.9357, 3.7809, 3.6963, 2.807, 6.4954, 5.5823, 5.1456, 4.8657,
410 4.5506, 4.3275, 4.2228, 3.0902, 6.8279, 5.8282, 5.3658, 5.0749,
411 4.7318, 4.4923, 4.3642, 3.1747, 7.2755, 6.197, 5.6715, 5.3642,
412 4.9991, 4.7135, 4.5945, 3.2905, 8.1885, 6.8537, 6.2077, 5.8499,
413 5.4246, 5.1137, 4.9555, 3.4808, 9.3061, 7.6592, 6.85, 6.4806,
414 5.9919, 5.6122, 5.5136, 3.719, 9.6132, 7.9234, 7.1025, 6.6731,
415 6.1549, 5.8217, 5.7345, 3.7911, 10.0989, 8.2395, 7.4326, 6.9567,
416 6.3908, 6.011, 5.9566, 3.8906, 10.8825, 8.8994, 7.8934, 7.4501,
417 6.9009, 6.4538, 6.2705, 4.0556, 11.8537, 9.5482, 8.5568, 8.0283,
418 7.4418, 6.9524, 6.6195, 4.2649 };
425 double p[] = { .00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,.1,.2,.3,.4,.5,.6,.7,.8,.9,
426 .925,.95,.975,.99,.9925,.995,.9975,.999,.99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999 };
429 const int nbins = 35;
436 MATH_ERROR_MSG(
"InterpolatePValues",
"Interpolation not implemented for nsamples not equal to 2");
439 std::vector<double> ts2(nbins);
440 std::vector<double> lp(nbins);
441 for (
int i = 0; i < nbins; ++i)
443 ts2[i] = ts[offset+ i * ns];
445 lp[i] = std::log( p[i]/(1.-p[i] ) );
449 int i1 = std::distance(ts2.begin(), std::lower_bound(ts2.begin(), ts2.end(), tx ) ) - 1;
457 if (i2 >=
int(ts2.size()) ) {
463 assert(i1 < (
int) lp.size() && i2 < (
int) lp.size() );
466 double tx1 = ts2[i1];
467 double tx2 = ts2[i2];
471 double lp0 = (lp1-lp2) * (tx - tx2)/ ( tx1-tx2) + lp2;
474 double p0 = exp(lp0)/(1. + exp(lp0) );
486 }
else if (A2 < 2.) {
487 pvalue = std::pow(A2, -0.5) * std::exp(-1.2337141 / A2) * (2.00012 + (0.247105 - (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * A2) * A2) * A2) * A2) * A2);
489 pvalue = std::exp(-1. * std::exp(1.0776 - (2.30695 - (0.43424 - (.082433 - (0.008056 - 0.0003146 * A2) * A2) * A2) * A2) * A2));
523 for (i = 0; i <
n; i++) {
537 for (i = 0; i <
n; i++) {
545void adkTestStat(
double *adk,
const std::vector<std::vector<double> > & samples,
const std::vector<double> & zstar) {
551 int k = samples.size();
552 int l = zstar.size();
556 std::vector<int> fij (k*
l);
560 std::vector<int> lvec(
l);
578 std::vector<int> ns(k);
580 for (i = 0; i < k; i++) {
581 ns[i] = samples[i].size();
589 for (j = 0; j <
l; j++) {
591 for (i = 0; i < k; i++) {
592 fij[i + j*k] =
getCount(zstar[j], &samples[i][0], ns[i]);
593 lvec[j] += fij[i + j*k];
600 for (i = 0; i < k; i++) {
606 for (j = 0; j <
l; j++) {
608 maij = mij - (
double) fij[i + j*k] / 2.0;
609 bj =
getSum(&lvec[0], j + 1);
610 baj = bj - (
double) lvec[j] / 2.0;
613 tmp = (
double) nsum * mij - (
double) ns[i] * bj;
614 innerSum = innerSum + (
double) lvec[j] * tmp * tmp /
615 (bj * ((
double) nsum - bj));
618 tmp = (
double) nsum * maij - (
double) ns[i] * baj;
619 aInnerSum = aInnerSum + (
double) lvec[j] * tmp * tmp /
620 (baj * (nsum - baj) - nsum * (
double) lvec[j] / 4.0);
623 adk[0] = adk[0] + innerSum / ns[i];
624 adk[1] = adk[1] + aInnerSum / ns[i];
630 adk[0] = adk[0] / (
double) nsum;
631 adk[1] = (nsum - 1) * adk[1] / ((
double) nsum * (
double) nsum);
649 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
655 std::vector<Double_t>::iterator endUnique = std::unique(z.begin(), z.end());
656 z.erase(endUnique, z.end() );
657 std::vector<UInt_t>
h;
658 std::vector<Double_t>
H;
666 unsigned int nSamples =
fSamples.size();
669 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
673 std::bind(std::less<Double_t>(), std::placeholders::_1, *data)) +
n / 2.);
675 std::cout <<
"time for H";
678 std::vector<std::vector<Double_t> >
F(nSamples);
679 for (
UInt_t i = 0; i < nSamples; ++i) {
680 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
683 std::bind(std::less<Double_t>(), std::placeholders::_1, *data)) +
n / 2.);
686 std::cout <<
"time for F";
688 for (
UInt_t i = 0; i < nSamples; ++i) {
692 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
696 std::cout <<
"time for sum_resut";
698 std::cout <<
"sum_result " << sum_result << std::endl;
699 A2 += 1.0 /
fSamples[i].size() * sum_result;
703 std::cout <<
"A2 - old Bartolomeo code " << A2 << std::endl;
708 double adk[2] = {0,0};
730 std::vector<UInt_t> ns(
fSamples.size());
731 for (
unsigned int k = 0; k < ns.size(); ++k) ns[k] =
fSamples[k].
size();
758 if (data1.
NDim() != 1 && data2.
NDim() != 1) {
759 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Bin Data set must be one-dimensional ");
762 unsigned int n1 = data1.
Size();
763 unsigned int n2 = data2.
Size();
769 std::vector<double> xdata(n1+n2);
770 for (
unsigned int i = 0; i < n1; ++i) {
772 const double *
x = data1.
GetPoint(i, value);
776 for (
unsigned int i = 0; i < n2; ++i) {
778 const double *
x = data2.
GetPoint(i, value);
782 double nall = ntot1+ntot2;
784 std::vector<unsigned int> index(n1+n2);
797 double x = xdata[ index[j] ];
802 unsigned int i = index[k];
805 value = data1.
Value(i);
812 value = data2.
Value(i);
819 }
while ( k < n1+n2 && xdata[ index[k] ] ==
x );
825 double tmp1 = ( nall * sum1 - ntot1 * sumAll );
826 double tmp2 = ( nall * sum2 - ntot2 * sumAll );
827 adsum += t * (tmp1*tmp1/ntot1 + tmp2*tmp2/ntot2) / ( sumAll * (nall - sumAll) ) ;
832 double A2 = adsum / nall;
835 std::vector<unsigned int> ns(2);
856 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
865 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
869 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
874 for (
Int_t i = 0; i <
n ; ++i) {
882 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Cannot compute p-value: data below or above the distribution's thresholds. Check sample consistency.");
892 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
899 MATH_ERROR_MSG(
"KolmogorovSmirnov2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
904 std::vector<Double_t>
a(na);
905 std::vector<Double_t>
b(nb);
915 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
924 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
928 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
933 for (
UInt_t i = 0; i <
n; ++i) {
937 if (result > Dn) Dn = result;
947 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
#define MATH_ERROR_MSG(loc, str)
#define MATH_WARN_MSG(loc, str)
static const double x1[5]
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
const double * GetPoint(unsigned int ipoint, double &value) const
retrieve at the same time a pointer to the coordinate data and the fit value More efficient than call...
double Value(unsigned int ipoint) const
return the value for the given fit point
unsigned int Size() const
return number of fit points
unsigned int NDim() const
return coordinate data dimension
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
std::unique_ptr< IGenFunction > fCDF
void SetSamples(std::vector< const Double_t * > samples, const std::vector< UInt_t > samplesSizes)
std::vector< Double_t > fCombinedSamples
The distribution parameters (e.g. fParams[0] = mean, fParams[1] = sigma for a Gaussian)
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
std::vector< Double_t > fParams
Type of distribution.
void Instantiate(const Double_t *sample, UInt_t sampleSize)
void SetDistribution(EDistribution dist, const std::vector< double > &distParams={})
Double_t GaussianCDF(Double_t x) const
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
std::vector< std::vector< Double_t > > fSamples
Double_t PValueAD1Sample(Double_t A2) const
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
Double_t ExponentialCDF(Double_t x) const
void SetParameters(const std::vector< double > ¶ms)
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
User Class for performing numerical integration of a function in one dimension.
double IntegralUp(const IGenFunction &f, double a)
evaluate the Integral of a function f over the semi-infinite interval (a,+inf)
void SetFunction(Function &f)
method to set the a generic integration function
double Integral(Function &f, double a, double b)
evaluate the Integral of a function f over the defined interval (a,b)
double IntegralLow(const IGenFunction &f, double b)
evaluate the Integral of a function f over the over the semi-infinite interval (-inf,...
PDFIntegral(const IGenFunction &pdf, Double_t xmin=0, Double_t xmax=-1)
Double_t DoEval(Double_t x) const
implementation of the evaluation function. Must be implemented by derived classes
IGenFunction * Clone() const
Clone a function.
const IGenFunction * fPDF
IntegratorOneDim fIntegral
Template class to wrap any member function of a class taking a double and returning a double in a 1D ...
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
void Print(Option_t *option="") const
Print the real and cpu time passed between the start and stop events.
double normal_cdf(double x, double sigma=1, double x0=0)
Cumulative distribution function of the normal (Gaussian) distribution (lower tail).
double exponential_cdf(double x, double lambda, double x0=0)
Cumulative distribution function of the exponential distribution (lower tail).
Namespace for new Math classes and functions.
void adkTestStat(double *adk, const std::vector< std::vector< double > > &samples, const std::vector< double > &zstar)
int getCount(double z, const double *dat, int n)
int getSum(const int *x, int n)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Double_t KolmogorovTest(Int_t na, const Double_t *a, Int_t nb, const Double_t *b, Option_t *option)
Statistical test whether two one-dimensional sets of points are compatible with coming from the same ...
Double_t Sqrt(Double_t x)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Double_t KolmogorovProb(Double_t z)
Calculates the Kolmogorov distribution function,.
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
IGenFunction * Clone() const
Clone a function.
Double_t DoEval(Double_t x) const
implementation of the evaluation function. Must be implemented by derived classes
CDFWrapper(const IGenFunction &cdf, Double_t xmin=0, Double_t xmax=-1)
const IGenFunction * fCDF
static uint64_t sum(uint64_t i)