Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TKDE.cxx
Go to the documentation of this file.
1// // @(#)root/hist:$Id$
2// Authors: Bartolomeu Rabacal 07/2010
3/**********************************************************************
4 * *
5 * Copyright (c) 2006 , ROOT MathLib Team *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 * *
10 **********************************************************************/
11
12/** \class TKDE
13 \ingroup Hist
14 Kernel Density Estimation class.
15 The three main references are:
16 1. "Scott DW, Multivariate Density Estimation. Theory, Practice and Visualization. New York: Wiley",
17 2. "Jann Ben - ETH Zurich, Switzerland -, Univariate kernel density estimation document for KDENS:
18 Stata module for univariate kernel density estimation."
19 3. "Hardle W, Muller M, Sperlich S, Werwatz A, Nonparametric and Semiparametric Models. Springer."
20 4. "Cranmer KS, Kernel Estimation in High-Energy
21 Physics. Computer Physics Communications 136:198-207,2001" - e-Print Archive: hep ex/0011057.
22
23 The algorithm is briefly described in (4). A binned version is also implemented to address the
24 performance issue due to its data size dependance.
25 */
26
27
28#include <functional>
29#include <algorithm>
30#include <memory>
31#include <numeric>
32#include <limits>
33#include <cassert>
34
35#include "Math/Error.h"
36#include "TMath.h"
37#include "Math/Functor.h"
38#include "Math/Integrator.h"
41#include "TGraphErrors.h"
42#include "TF1.h"
43#include "TH1.h"
44#include "TVirtualPad.h"
45#include "TKDE.h"
46
48
49
52 KernelIntegrand(const TKDE* kde, EIntegralResult intRes);
54private:
55 const TKDE* fKDE;
57};
58
59///////////////////////////////////////////////////////////////////////
60/// default constructor used by I/O
62 fKernelFunction(nullptr),
63 fKernel(nullptr),
64 fPDF(nullptr),
65 fUpperPDF(nullptr),
66 fLowerPDF(nullptr),
67 fApproximateBias(nullptr),
68 fGraph(nullptr),
69 fUseMirroring(false), fMirrorLeft(false), fMirrorRight(false), fAsymLeft(false), fAsymRight(false),
70 fUseBins(false), fNewData(false), fUseMinMaxFromData(false),
71 fNBins(0), fNEvents(0), fSumOfCounts(0), fUseBinsNEvents(0),
72 fMean(0.),fSigma(0.), fSigmaRob(0.), fXMin(0.), fXMax(0.),
73 fRho(0.), fAdaptiveBandwidthFactor(0.), fWeightSize(0)
74{
75}
76
78 //Class destructor
79 if (fPDF) delete fPDF;
80 if (fUpperPDF) delete fUpperPDF;
81 if (fLowerPDF) delete fLowerPDF;
82 if (fGraph) delete fGraph;
84 // delete fKernelFunction if it is not user defined
86 delete fKernelFunction;
87}
88
89////////////////////////////////////////////////////////////////////
90//// Internal function to instantiate a TKDE object
91void TKDE::Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t* data, const Double_t* dataWeights, Double_t xMin, Double_t xMax, const Option_t* option, Double_t rho) {
92
93 fPDF = nullptr;
94 fKernelFunction = nullptr;
95 fUpperPDF = nullptr;
96 fLowerPDF = nullptr;
97 fApproximateBias = nullptr;
98 fGraph = nullptr;
99 fNewData = false;
100 fUseMirroring = false; fMirrorLeft = false; fMirrorRight = false;
101 fAsymLeft = false; fAsymRight = false;
102 fNBins = events < 10000 ? 1000 : std::min(10000, int(events / 100)*10);
103 fNEvents = events;
104 fUseBinsNEvents = 10000;
105 fMean = 0.0;
106 fSigma = 0.0;
107 fXMin = xMin;
108 fXMax = xMax;
110 fSumOfCounts = 0;
112 fRho = rho;
113 fWeightSize = 0;
114 fCanonicalBandwidths = std::vector<Double_t>(kTotalKernels, 0.0);
115 fKernelSigmas2 = std::vector<Double_t>(kTotalKernels, -1.0);
116 fSettedOptions = std::vector<Bool_t>(4, kFALSE);
117 SetOptions(option, rho);
119 SetMirror();
120 SetUseBins();
121 SetData(data, dataWeights);
122 SetKernelFunction(kernfunc);
123
124}
125
127 //Sets User defined construction options
128 TString opt = option;
129 opt.ToLower();
130 std::string options = opt.Data();
131 size_t numOpt = 4;
132 std::vector<std::string> voption(numOpt, "");
133 for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end() && !options.empty(); ++it) {
134 size_t pos = options.find_last_of(';');
135 if (pos == std::string::npos) {
136 *it = options;
137 break;
138 }
139 *it = options.substr(pos + 1);
140 options = options.substr(0, pos);
141 }
142 for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end(); ++it) {
143 size_t pos = (*it).find(':');
144 if (pos != std::string::npos) {
145 GetOptions((*it).substr(0, pos) , (*it).substr(pos + 1));
146 }
147 }
149 fRho = rho;
150}
151
152void TKDE::SetDrawOptions(const Option_t* option, TString& plotOpt, TString& drawOpt) {
153 // Sets User defined drawing options
154 size_t numOpt = 2;
155 std::string options = TString(option).Data();
156 std::vector<std::string> voption(numOpt, "");
157 for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end() && !options.empty(); ++it) {
158 size_t pos = options.find_last_of(';');
159 if (pos == std::string::npos) {
160 *it = options;
161 break;
162 }
163 *it = options.substr(pos + 1);
164 options = options.substr(0, pos);
165 }
166 Bool_t foundPlotOPt = kFALSE;
167 Bool_t foundDrawOPt = kFALSE;
168 for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end() && !options.empty(); ++it) {
169 size_t pos = (*it).find(':');
170 if (pos == std::string::npos) break;
171 TString optionType = (*it).substr(0, pos);
172 TString optionInstance = (*it).substr(pos + 1);
173 optionType.ToLower();
174 optionInstance.ToLower();
175 if (optionType.Contains("plot")) {
176 foundPlotOPt = kTRUE;
177 if (optionInstance.Contains("estimate") || optionInstance.Contains("errors") || optionInstance.Contains("confidenceinterval"))
178 plotOpt = optionInstance;
179 else {
180 this->Warning("SetDrawOptions", "Unknown plotting option %s: setting to KDE estimate plot.",optionInstance.Data());
181 this->Info("SetDrawOptions", "Possible plotting options are: Estimate,Errors,ConfidenceInterval");
182 }
183 } else if (optionType.Contains("drawoptions")) {
184 foundDrawOPt = kTRUE;
185 drawOpt = optionInstance;
186 }
187 }
188 if (!foundPlotOPt) {
189 this->Warning("SetDrawOptions", "No plotting option: setting to KDE estimate plot.");
190 plotOpt = "estimate";
191 }
192 if (!foundDrawOPt) {
193 this->Warning("SetDrawOptions", "No drawing options: setting to default ones.");
194 drawOpt = "apl4";
195 }
196}
197
198void TKDE::GetOptions(std::string optionType, std::string option) {
199 // Gets User defined KDE construction options
200 if (optionType == "kerneltype") {
202 if (option == "gaussian") {
204 } else if (option == "epanechnikov") {
206 } else if (option == "biweight") {
208 } else if (option == "cosinearch") {
210 } else if (option == "userdefined") {
212 } else {
213 this->Warning("GetOptions", "Unknown kernel type option %s: setting to Gaussian",option.c_str());
214 this->Info("GetOptions", "Possible kernel type options are: Gaussian, Epanechnikov, Biweight, Cosinearch, Userdefined");
216 }
217 } else if (optionType == "iteration") {
219 if (option == "adaptive") {
221 } else if (option == "fixed") {
223 } else {
224 this->Warning("GetOptions", "Unknown iteration option %s: setting to Adaptive",option.c_str());
225 this->Info("GetOptions","Possible iteration type options are: Adaptive, Fixed");
227 }
228 } else if (optionType == "mirror") {
230 if (option == "nomirror") {
232 } else if (option == "mirrorleft") {
234 } else if (option == "mirrorright") {
236 } else if (option == "mirrorboth") {
238 } else if (option == "mirrorasymleft") {
240 } else if (option == "mirrorrightasymleft") {
242 } else if (option == "mirrorasymright") {
244 } else if (option == "mirrorleftasymright") {
246 } else if (option == "mirrorasymboth") {
248 } else {
249 this->Warning("GetOptions", "Unknown mirror option %s: setting to NoMirror",option.c_str());
250 this->Info("GetOptions", "Possible mirror type options are: NoMirror, MirrorLeft, MirrorRight, MirrorAsymLeft,"
251 "MirrorAsymRight, MirrorRightAsymLeft, MirrorLeftAsymRight, MirrorAsymBoth");
253 }
254 } else if (optionType == "binning") {
256 if (option == "unbinned") {
258 } else if (option == "relaxedbinning") {
260 } else if (option == "forcedbinning") {
262 } else {
263 this->Warning("GetOptions", "Unknown binning option %s: setting to RelaxedBinning", option.c_str());
264 this->Info("GetOptions", "Possible binning type options are: Unbinned, ForcedBinning, RelaxedBinning");
266 }
267 }
268}
269
271 // Sets missing construction options to default ones
272 if (!fSettedOptions[0]) {
274 }
275 if (!fSettedOptions[1]) {
277 }
278 if (!fSettedOptions[2]) {
280 }
281 if (!fSettedOptions[3]) {
283 }
284}
285
286void TKDE::CheckOptions(Bool_t isUserDefinedKernel) {
287 // Sets User global options
288 if (!(isUserDefinedKernel) && !(fKernelType >= kGaussian && fKernelType < kUserDefined)) {
289 Error("CheckOptions", "Illegal user kernel type input! Use template constructor for user defined kernel.");
290 }
291 if (fIteration != kAdaptive && fIteration != kFixed) {
292 Warning("CheckOptions", "Illegal user iteration type input - use default value !");
294 }
295 if (!(fMirror >= kNoMirror && fMirror <= kMirrorAsymBoth)) {
296 Warning("CheckOptions", "Illegal user mirroring type input - use default value !");
298 }
299 if (!(fBinning >= kUnbinned && fBinning <= kForcedBinning)) {
300 Warning("CheckOptions", "Illegal user binning type input - use default value !");
302 }
303 if (fRho <= 0.0) {
304 Warning("CheckOptions", "Tuning factor rho cannot be non-positive - use default value !");
305 fRho = 1.0;
306 }
307}
308
310 // Sets User option for the choice of kernel estimator
312 delete fKernelFunction;
313 fKernelFunction = nullptr;
314 }
315 fKernelType = kern;
316 CheckOptions();
317 // resetting fKernel automatically calls ReInit when needed
318 fKernel.reset();
319}
320
322 // Sets User option for fixed or adaptive iteration
323 fIteration = iter;
324 CheckOptions();
325 fKernel.reset();
326}
327
329 // Sets User option for mirroring the data
330 fMirror = mir;
331 CheckOptions();
332 SetMirror();
333 if (fUseMirroring) {
335 }
336 fKernel.reset();
337}
338
340 // Sets User option for binning the weights
341 fBinning = bin;
342 CheckOptions();
343 SetUseBins();
344}
345
347 // Sets User option for number of bins
348 if (!nbins) {
349 Error("SetNBins", "Number of bins must be greater than zero.");
350 return;
351 }
352
353 fNBins = nbins;
354
355 SetUseBins();
356 if (!fUseBins) {
357 if (fBinning == kUnbinned)
358 Warning("SetNBins", "Bin type using SetBinning must be set for using a binned evaluation");
359 else
360 Warning("SetNBins", "Bin type using SetBinning or with SetUseBinsNEvents must be set for using a binned evaluation");
361 }
362}
363
365 // Sets User option for the minimum number of events for allowing automatic binning
366 fUseBinsNEvents = nEvents;
367 SetUseBins();
368}
369
371 // Factor which can be used to tune the smoothing.
372 // It is used as multiplicative factor for the fixed and adaptive bandwidth.
373 // A value < 1 will reproduce better the tails but oversmooth the peak
374 // while a factor > 1 will overestimate the tail
375 fRho = rho;
376 CheckOptions();
377 fKernel.reset();
378}
379
381 // Sets minimum range value and maximum range value
382 if (xMin >= xMax) {
383 Error("SetRange", "Minimum range cannot be bigger or equal than the maximum range! Present range values remain the same.");
384 return;
385 }
386 fXMin = xMin;
387 fXMax = xMax;
388 fUseMinMaxFromData = false;
389 fKernel.reset();
390}
391
392// private methods
393
395 // Sets User option for using binned weights
396 switch (fBinning) {
397 default:
398 case kRelaxedBinning:
399 if (fNEvents >= fUseBinsNEvents) {
400 fUseBins = kTRUE;
401 } else {
403 }
404 break;
405 case kForcedBinning:
406 fUseBins = kTRUE;
407 break;
408 case kUnbinned:
410 }
411
412 // during initialization we don't need to recompute the bins
413 // it is done within TKDE::SetData
414 // in this case fEvents is empty
415 if (fEvents.empty()) return;
416
417 // in case we need to recompute bins structure
418 // needed when called from the TKDE setters method
419 if (fUseBins) {
423 }
424 else {
425 // unbinned case
426 fWeightSize = 0.;
427 fBinCount.clear();
428 fData = fEvents;
429 }
430 fKernel.reset();
431}
432
434 // Sets the mirroring
440}
441
442void TKDE::SetData(const Double_t* data, const Double_t* wgts) {
443 // Sets the data events input sample or bin centres for binned option and computes basic estimators
444 if (!data) {
445 if (fNEvents) fData.reserve(fNEvents);
446 return;
447 }
448 fEvents.assign(data, data + fNEvents);
449 if (wgts) fEventWeights.assign(wgts, wgts + fNEvents);
450
451 if (fUseMinMaxFromData) {
452 fXMin = *std::min_element(fEvents.begin(), fEvents.end());
453 fXMax = *std::max_element(fEvents.begin(), fEvents.end());
454 }
455
456 if (fUseBins) {
457 if (fNBins >= fNEvents) {
458 this->Warning("SetData", "Default number of bins is greater or equal to number of events. Use SetNBins(UInt_t) to set the appropriate number of bins");
459 }
462 } else {
463 fWeightSize = 0; //fNEvents / (fXMax - fXMin);
464 fData = fEvents;
465 }
466
468 if (fUseMirroring) {
469 // set mirror events called automatically SetBinCount
471 }
472 else {
473 // to set fBinCount and fSumOfCounts
475 }
476}
477
479 // re-initialize the KDE in case of new data or in case of reading from a file
480
481 // in case of new data re-compute the statistics (works only for unbinned case)
482 if (fNewData) {
484 return;
485 }
486
487 if (fEvents.empty()) {
488 Error("ReInit","TKDE does not contain any data !");
489 return;
490 }
491
492 // when of reading from a file fKernelFunction is a nullptr
493 // we need to recreate Kernel class (with adaptive weights if needed) and
494 // recreate kernel function pointer
495 if (!fKernelFunction) SetKernelFunction(nullptr);
496
497 SetKernel();
498}
499
501 // re-initialize when new data have been filled in TKDE
502 // re-compute kernel quantities and statistics
503 if (fUseBins) {
504 Error("InitFromNewData","Re-felling is not supported with binning");
505 return;
506 }
507
508 fNewData = false;
509 // in case of unbinned data
510 if (!fUseBins) fEvents = fData;
511 if (fUseMinMaxFromData) {
512 fXMin = *std::min_element(fEvents.begin(), fEvents.end());
513 fXMax = *std::max_element(fEvents.begin(), fEvents.end());
514 }
516 //
518 if (fUseMirroring) {
520 }
521 // in case of I/O reset the kernel
522}
523
524///////////////////////////////////////////////////////////////////////////////////////////////////////
525/// Intgernal function to mirror the data
527 assert(fUseMirroring);
528 if (fUseBins) {
529 // binned case
530 // first fill the bin data in [fXmin,fXmax]
531 // bin center should have been already filled before
534
535 // now mirror the bins
536 assert(fNBins == fData.size());
537 if (fMirrorLeft) {
538 fData.insert(fData.begin(), fNBins, 0.);
539 fBinCount.insert(fBinCount.begin(), fNBins, 0.);
540 for (UInt_t i = 0; i < fNBins; ++i) {
541 fData[i] = fData[i + fNBins] - (fXMax - fXMin);
542 fBinCount[fNBins - i - 1] = fBinCount[i + fNBins];
543 }
544 }
545 if (fMirrorRight) {
546 fData.insert(fData.end(), fNBins, 0.);
547 fBinCount.insert(fBinCount.end(), fNBins, 0.);
548 int i0 = (fMirrorLeft) ? fNBins : 0;
549 int iLast = i0 + 2 * fNBins - 1;
550 for (UInt_t i = 0; i < fNBins; ++i) {
551 fData[i0 + fNBins + i] = fData[i0 + i] + (fXMax - fXMin);
552 fBinCount[iLast - i] = fBinCount[i0 + i];
553 }
554 }
555 }
556 else {
557 // unbinned case (transform events)
558 std::vector<Double_t> originalEvents = fEvents;
559 std::vector<Double_t> originalWeights = fEventWeights;
560 if (fMirrorLeft) {
561 fEvents.resize(2 * fNEvents, 0.0);
562 transform(fEvents.begin(), fEvents.begin() + fNEvents, fEvents.begin() + fNEvents,
563 std::bind(std::minus<Double_t>(), 2 * fXMin, std::placeholders::_1));
564 }
565 if (fMirrorRight) {
566 fEvents.resize((fMirrorLeft + 2) * fNEvents, 0.0);
567 transform(fEvents.begin(), fEvents.begin() + fNEvents, fEvents.begin() + (fMirrorLeft + 1) * fNEvents,
568 std::bind(std::minus<Double_t>(), 2 * fXMax, std::placeholders::_1));
569 }
570 if (!fEventWeights.empty() && (fMirrorLeft || fMirrorRight)) {
571 // copy weights too
572 fEventWeights.insert(fEventWeights.end(), fEventWeights.begin(), fEventWeights.begin() + fNEvents);
574 fEventWeights.insert(fEventWeights.end(), fEventWeights.begin(), fEventWeights.begin() + fNEvents);
575 }
576
577 fData = fEvents;
579 fEvents = originalEvents;
580 fEventWeights = originalWeights;
581 }
582
583}
584
586 // Computes input data's mean
587 fMean = std::accumulate(fEvents.begin(), fEvents.end(), 0.0) / fEvents.size();
588}
589
591 // Computes input data's sigma
592 fSigma = std::sqrt(1. / (fEvents.size() - 1.) * (std::inner_product(fEvents.begin(), fEvents.end(), fEvents.begin(), 0.0) - fEvents.size() * std::pow(fMean, 2.)));
593 fSigmaRob = std::min(fSigma, R / 1.349); // Sigma's robust estimator
594}
595
597 // Sets the kernel density estimator
598 // n here should be fNEvents or the total size including the mirrored events ???
599 // it should not be fData.size() that in binned case is number of bins
601 if (n == 0) return;
602 // Optimal bandwidth (Silverman's rule of thumb with assumed Gaussian density)
603 Double_t weight = fCanonicalBandwidths[kGaussian] * fSigmaRob * std::pow(3. / (8. * std::sqrt(M_PI)) * n, -0.2);
605
606 fKernel = std::make_unique<TKernel>(weight, this);
607
608 if (fIteration == kAdaptive) {
609 fKernel->ComputeAdaptiveWeights();
610 }
611 if (gDebug) {
612 if (fIteration != kAdaptive)
613 Info("SetKernel",
614 "Using a fix kernel - bandwidth = %f - using n = %d, rho = %f , sigmaRob = %f , mean = %f , sigma = %f "
615 ", canonicalBandwidth= %f",
617 else
618 Info("SetKernel",
619 "Using an adaptive kernel - weight = %f - using n = %d, rho = %f , sigmaRob = %f , mean = %f , sigma = %f "
620 ", canonicalBandwidth= %f",
622 }
623}
624
626 // sets the kernel function pointer. It creates and manages the pointer for internal classes
627 if (fKernelFunction) {
628 // kernel function pointer must be set to null before calling SetKernelFunction to avoid memory leaks
629 Fatal("SetKernelFunction", "Kernel function pointer is not null");
630 }
631 switch (fKernelType) {
632 case kGaussian :
634 break;
635 case kEpanechnikov :
637 break;
638 case kBiweight :
640 break;
641 case kCosineArch :
643 break;
644 case kUserDefined :
645 fKernelFunction = kernfunc;
647 break;
648 case kTotalKernels :
649 default:
650 /// for user defined kernels
651 fKernelFunction = kernfunc;
653 }
654
655 if (fKernelType == kUserDefined) {
656 if (fKernelFunction) {
660 }
661 else {
662 Error("SetKernelFunction", "User kernel function is not defined !");
663 return;
664 }
665 }
666 assert(fKernelFunction);
669 SetKernel();
670}
671
673 // Sets the canonical bandwidths according to the kernel type
674 fCanonicalBandwidths[kGaussian] = 0.7764; // Checked in Mathematica
675 fCanonicalBandwidths[kEpanechnikov] = 1.7188; // Checked in Mathematica
676 fCanonicalBandwidths[kBiweight] = 2.03617; // Checked in Mathematica
677 fCanonicalBandwidths[kCosineArch] = 1.7663; // Checked in Mathematica
678 fCanonicalBandwidths[kUserDefined] = 1.0; // To be Checked
679}
680
682 // Sets the kernel sigmas2 according to the kernel type
684 fKernelSigmas2[kEpanechnikov] = 1.0 / 5.0;
685 fKernelSigmas2[kBiweight] = 1.0 / 7.0;
686 fKernelSigmas2[kCosineArch] = 1.0 - 8.0 / std::pow(M_PI, 2);
687}
688
690 // Returns the PDF estimate as a function sampled in npx between xMin and xMax
691 // the KDE is not re-normalized to the xMin/xMax range.
692 // The user manages the returned function
693 // For getting a non-sampled TF1, one can create a TF1 directly from the TKDE by doing
694 // TF1 * f1 = new TF1("f1",kde,xMin,xMax,0);
695 return GetKDEFunction(npx,xMin,xMax);
696}
697
698TF1* TKDE::GetUpperFunction(Double_t confidenceLevel, UInt_t npx, Double_t xMin, Double_t xMax) {
699 // Returns the PDF upper estimate (upper confidence interval limit)
700 return GetPDFUpperConfidenceInterval(confidenceLevel,npx,xMin,xMax);
701}
702
703TF1* TKDE::GetLowerFunction(Double_t confidenceLevel, UInt_t npx, Double_t xMin, Double_t xMax) {
704 // Returns the PDF lower estimate (lower confidence interval limit)
705 return GetPDFLowerConfidenceInterval(confidenceLevel,npx,xMin,xMax);
706}
707
709 // Returns the PDF estimate bias
710 return GetKDEApproximateBias(npx,xMin,xMax);
711}
712
714 // Fills data member with User input data event for the unbinned option
715 if (fUseBins) {
716 this->Warning("Fill", "Cannot fill data with data binned option. Data input ignored.");
717 return;
718 }
719 fData.push_back(data);
720 fNEvents++;
721 fNewData = kTRUE;
722}
723
725 // Fills data member with User input data event for the unbinned option
726 if (fUseBins) {
727 this->Warning("Fill", "Cannot fill data with data binned option. Data input ignored.");
728 return;
729 }
730 fData.push_back(data); // should not be here fEvent ??
731 fEventWeights.push_back(weight);
732 fNEvents++;
733 fNewData = kTRUE;
734}
735
737 // The class's unary function: returns the kernel density estimate
738 return (*this)(*x);
739}
740
742 // The class's unary function: returns the kernel density estimate
743 if (!fKernel) {
744 (const_cast<TKDE*>(this))->ReInit();
745 // in case of failed re-initialization
746 if (!fKernel) return TMath::QuietNaN();
747 }
748 return (*fKernel)(x);
749}
750
752 // return the mean of the data
753 if (fNewData) (const_cast<TKDE*>(this))->InitFromNewData();
754 return fMean;
755}
756
758 // return the standard deviation of the data
759 if (fNewData) (const_cast<TKDE*>(this))->InitFromNewData();
760 return fSigma;
761}
762
764 // Returns the Root Asymptotic Mean Integrated Squared Error according to Silverman's rule of thumb with assumed Gaussian density
765 Double_t result = 5. / 4. * fKernelSigmas2[fKernelType] * std::pow(fCanonicalBandwidths[fKernelType], 4) * std::pow(3. / (8. * std::sqrt(M_PI)) , -0.2) * fSigmaRob * std::pow(fNEvents, -0.8);
766 return std::sqrt(result);
767}
768
770// Internal class constructor
771fKDE(kde),
772fNWeights(kde->fData.size()),
773fWeights(1, weight)
774{}
775
777 // Gets the adaptive weights (bandwidths) for TKernel internal computation
778 unsigned int n = fKDE->fData.size();
779 Double_t minWeight = fWeights[0] * 0.05;
780 // we will store computed adaptive weights in weights
781 std::vector<Double_t> weights(n, fWeights[0]);
782 bool useDataWeights = (fKDE->fBinCount.size() == n);
783 Double_t f = 0.0;
784 for (unsigned int i = 0; i < n; ++i) {
785 // for negative or null bin contents use the fixed weight value (fWeights[0])
786 if (useDataWeights && fKDE->fBinCount[i] <= 0) {
787 weights[i] = fWeights[0];
788 continue; // skip negative or null weights
789 }
790 f = (*fKDE->fKernel)(fKDE->fData[i]);
791 if (f <= 0) {
792 // this can happen when data are outside range and fAsymLeft or fAsymRight is on
793 fKDE->Warning("ComputeAdativeWeights","function value is zero or negative for x = %f w = %f - set their bandwidth to zero",
794 fKDE->fData[i],(useDataWeights) ? fKDE->fBinCount[i] : 1.);
795 // set bandwidth for these points to zero.
796 weights[i] = 0;
797 continue;
798 }
799 // use weight if it is larger
800 weights[i] = std::max(weights[i] /= std::sqrt(f), minWeight);
801 fKDE->fAdaptiveBandwidthFactor += std::log(f);
802 // printf("(f = %f w = %f af = %f ),",f,*weight,fKDE->fAdaptiveBandwidthFactor);
803 }
804 Double_t kAPPROX_GEO_MEAN = 0.241970724519143365; // 1 / TMath::Power(2 * TMath::Pi(), .5) * TMath::Exp(-.5). Approximated geometric mean over pointwise data (the KDE function is substituted by the "real Gaussian" pdf) and proportional to sigma. Used directly when the mirroring is enabled, otherwise computed from the data
805 // not sure for this special case for mirror. This results in a much smaller bandwidth for mirror case
806 fKDE->fAdaptiveBandwidthFactor = fKDE->fUseMirroring ? kAPPROX_GEO_MEAN / fKDE->fSigmaRob : std::sqrt(std::exp(fKDE->fAdaptiveBandwidthFactor / fKDE->fData.size()));
807 // set adaptive weights in fWeights matrix
808 fWeights.resize(n);
809 transform(weights.begin(), weights.end(), fWeights.begin(),
810 std::bind(std::multiplies<Double_t>(), std::placeholders::_1, fKDE->fAdaptiveBandwidthFactor));
811 //printf("adaptive bandwidth factor % f weight 0 %f , %f \n",fKDE->fAdaptiveBandwidthFactor, weights[0],fWeights[0] );
812}
813
815 // Returns the bandwidth
816 return fWeights[fKDE->Index(x)];
817}
818
820 // Returns the bins' centres from the data for using with the binned option
821 fData.assign(fNBins, 0.0);
822 Double_t binWidth((xmax - xmin) / fNBins);
823 for (UInt_t i = 0; i < fNBins; ++i) {
824 fData[i] = xmin + (i + 0.5) * binWidth;
825 }
826}
827
829 // Returns the bins' count from the data for using with the binned option
830 // or set the bin count to the weights in case of weighted data
831 if (fUseBins) {
832 fBinCount.assign(fNBins, 0.0);
833 fSumOfCounts = 0;
834 // note this function should be called before the data have been mirrored
835 UInt_t nevents = fNEvents;
836 assert(fEvents.size() == nevents);
837 // case of weighted events
838 if (!fEventWeights.empty() ) {
839 assert(nevents == fEventWeights.size());
840 for (UInt_t i = 0; i < nevents; ++i) {
841 if (fEvents[i] >= fXMin && fEvents[i] < fXMax) {
844 //printf("sum of counts %f - bin count %d - %f \n",fSumOfCounts, Index(fEvents[i]), fBinCount[Index(fEvents[i])] );
845 }
846 }
847 }
848 // case of unweighted data
849 else {
850 for (UInt_t i = 0; i < nevents; ++i) {
851 if (fEvents[i] >= fXMin && fEvents[i] < fXMax) {
852 fBinCount[Index(fEvents[i])] += 1;
853 fSumOfCounts += 1;
854 }
855 }
856 }
857 }
858 else if (!fEventWeights.empty() ) {
859 //unbinned weighted data
861 fSumOfCounts = 0;
862 for (UInt_t i = 0; i < fNEvents; ++i) {
863 if (fEvents[i] >= fXMin && fEvents[i] < fXMax) {
865 }
866 }
867 }
868 else {
869 // unbinned unweighted data
870 //fSumOfCounts = fNEvents;
871 fSumOfCounts = 0;
872 for (UInt_t i = 0; i < fNEvents; ++i) {
873 if (fEvents[i] >= fXMin && fEvents[i] < fXMax) {
874 fSumOfCounts += 1;
875 }
876 }
877 fBinCount.clear();
878 }
879}
880
881////////////////////////////////////////////////////////////////////////////////
882/// Draws either the KDE functions or its errors
883// @param opt : Drawing options:
884// - "" (default) - draw just the kde
885// - "same" draw on top of existing pad
886// - "Errors" draw a TGraphErrors with the point and errors
887// -"confidenceinterval" draw KDE + conf interval functions (default is 95%)
888// -"confidenceinterval@0.90" draw KDE + conf interval functions at 90%
889// - Extra options can be passed in opt for the drawing of the corresponding TF1 or TGraph
890//
891// NOTE: The functions GetDrawnFunction(), GetDrawnUpperFunction(), GetDrawnLowerFunction()
892// and GetGraphWithErrors() return the corresponding drawn objects (which are managed by the TKDE)
893// They can be used to changes style, color, etc..
894////
895void TKDE::Draw(const Option_t* opt) {
896
897 TString plotOpt = opt;
898 plotOpt.ToLower();
899 TString drawOpt = plotOpt;
900 if(gPad && !plotOpt.Contains("same")) {
901 gPad->Clear();
902 }
903 if (plotOpt.Contains("errors")) {
904 drawOpt.ReplaceAll("errors","");
905 DrawErrors(drawOpt);
906 }
907 else if (plotOpt.Contains("confidenceinterval") ||
908 plotOpt.Contains("confinterval")) {
909 // parse level option
910 drawOpt.ReplaceAll("confidenceinterval","");
911 drawOpt.ReplaceAll("confinterval","");
912 Double_t level = 0.95;
913 const char * s = strstr(plotOpt.Data(),"interval@");
914 // coverity [secure_coding : FALSE]
915 if (s != nullptr) sscanf(s,"interval@%lf",&level);
916 if((level <= 0) || (level >= 1)) {
917 Warning("Draw","given confidence level %.3lf is invalid - use default 0.95",level);
918 level = 0.95;
919 }
920 DrawConfidenceInterval(drawOpt,level);
921 }
922 else {
923 if (fPDF) delete fPDF;
925 fPDF->Draw(drawOpt);
926 }
927}
928
929///////////////////////////////////////////////////////////////////////
930/// Draws a TGraphErrors with KDE values and errors
932{
933 if (fGraph) delete fGraph;
935 fGraph->Draw(drawOpt.Data());
936}
937///////////////////////////////////////////////////////////////////////
938/// return a TGraphErrors with the KDE values and errors
939/// The return object is managed by the user
941 if (xmin>= xmax) { xmin = fXMin; xmax = fXMax; }
942 UInt_t n = npx;
943 Double_t* x = new Double_t[n + 1];
944 Double_t* ex = new Double_t[n + 1];
945 Double_t* y = new Double_t[n + 1];
946 Double_t* ey = new Double_t[n + 1];
947 for (UInt_t i = 0; i <= n; ++i) {
948 x[i] = xmin + i * (xmax - xmin) / n;
949 y[i] = (*this)(x[i]);
950 ex[i] = 0;
951 ey[i] = this->GetError(x[i]);
952 }
953 TGraphErrors* ge = new TGraphErrors(n, &x[0], &y[0], &ex[0], &ey[0]);
954 ge->SetName("kde_graph_error");
955 ge->SetTitle("Errors");
956 delete [] x;
957 delete [] ex;
958 delete [] y;
959 delete [] ey;
960 return ge;
961}
962
963//////////////////////////////////////////////////////////////
964/// // Draws the KDE and its confidence interval
965void TKDE::DrawConfidenceInterval(TString& drawOpt,double cl) {
966 GetKDEFunction()->Draw(drawOpt.Data());
968 upper->SetLineColor(kBlue);
969 upper->Draw(("same" + drawOpt).Data());
971 lower->SetLineColor(kRed);
972 lower->Draw(("same" + drawOpt).Data());
973 if (fUpperPDF) delete fUpperPDF;
974 if (fLowerPDF) delete fLowerPDF;
975 fUpperPDF = upper;
976 fLowerPDF = lower;
977}
978
980 // Returns the bandwidth for the non adaptive KDE
981 Double_t result = -1.;
983 this->Warning("GetFixedWeight()", "Fixed iteration option not enabled. Returning %f.", result);
984 } else {
985 result = fKernel->GetFixedWeight();
986 }
987 return result;
988}
989
991 // Returns the bandwidths for the adaptive KDE
993 this->Warning("GetFixedWeight()", "Adaptive iteration option not enabled. Returning a NULL pointer<");
994 return nullptr;
995 }
996 if (fNewData) (const_cast<TKDE*>(this))->InitFromNewData();
997 return &(fKernel->GetAdaptiveWeights()).front();
998}
999
1001 // Returns the bandwidth for the non adaptive KDE
1002 return fWeights[0];
1003}
1004
1005const std::vector<Double_t> & TKDE::TKernel::GetAdaptiveWeights() const {
1006 // Returns the bandwidth for the adaptive KDE
1007 return fWeights;
1008}
1009
1011 // The internal class's unary function: returns the kernel density estimate
1012 Double_t result(0.0);
1013 UInt_t n = fKDE->fData.size();
1014 // case of bins or weighted data
1015 Bool_t useCount = (fKDE->fBinCount.size() == n);
1016 // also in case of unbinned unweighted data fSumOfCounts is sum of events in range
1017 // events outside range should be used to normalize the TKDE ??
1018 Double_t nSum = fKDE->fSumOfCounts; //(useBins) ? fKDE->fSumOfCounts : fKDE->fNEvents;
1019 //if (!useCount) nSum = fKDE->fNEvents;
1020 // in case of non-adaptive fWeights is a vector of size 1
1021 Bool_t hasAdaptiveWeights = (fWeights.size() == n);
1022 Double_t invWeight = (!hasAdaptiveWeights) ? 1. / fWeights[0] : 0;
1023 for (UInt_t i = 0; i < n; ++i) {
1024 Double_t binCount = (useCount) ? fKDE->fBinCount[i] : 1.0;
1025 // uncommenting following line slows down so keep computation for
1026 // zero bincounts
1027 //if (binCount <= 0) continue;
1028 if (hasAdaptiveWeights) {
1029 // skip data points that have 0 bandwidth (this can happen, see TKernel::ComputeAdaptiveWeight)
1030 if (fWeights[i] == 0) continue;
1031 invWeight = 1. / fWeights[i];
1032 }
1033 result += binCount * invWeight * (*fKDE->fKernelFunction)((x - fKDE->fData[i]) * invWeight );
1034 if (fKDE->fAsymLeft) {
1035 result += binCount * invWeight * (*fKDE->fKernelFunction)((x - (2. * fKDE->fXMin - fKDE->fData[i])) * invWeight);
1036 }
1037 if (fKDE->fAsymRight) {
1038 result += binCount * invWeight * (*fKDE->fKernelFunction)((x - (2. * fKDE->fXMax - fKDE->fData[i])) * invWeight);
1039 }
1040 // printf("data point %i %f %f count %f weight % f result % f\n",i,fKDE->fData[i],fKDE->fEvents[i],binCount,fWeights[i], result);
1041 }
1042 if ( TMath::IsNaN(result) ) {
1043 fKDE->Warning("operator()","Result is NaN for x %f \n",x);
1044 }
1045 return result / nSum;
1046}
1047
1048////////////////////////////////////////////////////
1049/// compute the bin index given a data point x
1051 // Returns the indices (bins) for the data point
1052 // fWeightSize is the inverse of the bin width
1053 Int_t bin = Int_t((x - fXMin) * fWeightSize);
1054 if (bin == (Int_t)fData.size()) return --bin;
1055 if (bin > (Int_t)fData.size()) {
1056 return (Int_t)(fData.size()) - 1;
1057 } else if (bin <= 0) {
1058 return 0;
1059 }
1060 return bin;
1061}
1062
1064 // Returns the pointwise upper estimated density
1065 Double_t f = (*this)(x);
1067 Double_t prob = 1. - (1.-*p)/2; // this is 1.-alpha/2
1069 return f + z * sigma;
1070}
1071
1073 // Returns the pointwise lower estimated density
1074 Double_t f = (*this)(x);
1076 Double_t prob = (1.-*p)/2; // this is alpha/2
1078 return f + z * sigma;
1079}
1080
1081
1083 // Returns the pointwise approximate estimated density bias
1086 rd.SetFunction(kern);
1087 Double_t df2 = rd.Derivative2(x);
1088 Double_t weight = fKernel->GetWeight(x); // Bandwidth
1089 return 0.5 * fKernelSigmas2[fKernelType] * std::pow(weight, 2) * df2;
1090}
1092 // Returns the pointwise sigma of estimated density
1093 Double_t kernelL2Norm = ComputeKernelL2Norm();
1094 Double_t f = (*this)(x);
1095 Double_t weight = fKernel->GetWeight(x); // Bandwidth
1096 Double_t result = f * kernelL2Norm / (fNEvents * weight);
1097 return std::sqrt(result);
1098}
1099
1101 // Checks if kernel has unit integral, mu = 0 and positive finite sigma conditions
1102 Double_t valid = kTRUE;
1104 valid = valid && unity == 1.;
1105 if (!valid) {
1106 Error("CheckKernelValidity", "Kernel's integral is %f",unity);
1107 }
1109 valid = valid && mu == 0.;
1110 if (!valid) {
1111 Error("CheckKernelValidity", "Kernel's mu is %f" ,mu);
1112 }
1113 Double_t sigma2 = ComputeKernelSigma2();
1114 valid = valid && sigma2 > 0 && sigma2 != std::numeric_limits<Double_t>::infinity();
1115 if (!valid) {
1116 Error("CheckKernelValidity", "Kernel's sigma2 is %f",sigma2);
1117 }
1118 if (!valid) {
1119 Error("CheckKernelValidity", "Validation conditions: the kernel's integral must be 1, the kernel's mu must be zero and the kernel's sigma2 must be finite positive to be a suitable kernel.");
1120 //exit(EXIT_FAILURE);
1121 }
1122}
1123
1125 // Computes the kernel's L2 norm
1128 ig.SetFunction(kernel);
1129 Double_t result = ig.Integral();
1130 return result;
1131}
1132
1134 // Computes the kernel's sigma squared
1137 ig.SetFunction(kernel);
1138 Double_t result = ig.Integral();
1139 return result;
1140}
1141
1143 // Computes the kernel's mu
1146 ig.SetFunction(kernel);
1147 Double_t result = ig.Integral();
1148 return result;
1149}
1150
1152 // Computes the kernel's integral which ought to be unity
1155 ig.SetFunction(kernel);
1156 Double_t result = ig.Integral();
1157 return result;
1158}
1159
1160//////////////////////////////////////////////////////////////////////////////////////////////////////////////
1161/// Internal function to compute statistics (mean,stddev) using always all the provided data (i.e. no binning)
1163 /// in case of weights use
1164 if (!fEventWeights.empty() ) {
1165 // weighted data
1166 double x1 = fXMin - 0.001*(fXMax-fXMin);
1167 double x2 = fXMax + 0.001*(fXMax-fXMin);
1168 TH1D h1("temphist","", 500, x1, x2);
1169 h1.FillN(fEvents.size(), fEvents.data(), fEventWeights.data() );
1170 assert (h1.GetSumOfWeights() > 0) ;
1171 fMean = h1.GetMean();
1172 fSigma = h1.GetRMS();
1173 // compute robust sigma using midspread
1174 Double_t quantiles[2] = {0.0, 0.0};
1175 Double_t prob[2] = {0.25, 0.75};
1176 h1.GetQuantiles(2, quantiles, prob);
1177 Double_t midspread = quantiles[1] - quantiles[0];
1178 fSigmaRob = std::min(fSigma, midspread / 1.349); // Sigma's robust estimator
1179 //printf("weight case - stat: m = %f, s= %f, sr = %f \n",fMean, fSigma, midspread);
1180 return;
1181 }
1182 else {
1183 // compute statistics using the data
1184 SetMean();
1185 Double_t midspread = ComputeMidspread();
1186 SetSigma(midspread);
1187 //printf("un-weight case - stat: m = %f, s= %f, sr = %f \n",fMean, fSigma, fSigmaRob);
1188 }
1189}
1190
1192 // Computes the inter-quartile range from the data
1193 std::sort(fEvents.begin(), fEvents.end());
1194 Double_t quantiles[2] = {0.0, 0.0};
1195 Double_t prob[2] = {0.25, 0.75};
1196 TMath::Quantiles(fEvents.size(), 2, &fEvents[0], quantiles, prob);
1197 Double_t lowerquartile = quantiles[0];
1198 Double_t upperquartile = quantiles[1];
1199 return upperquartile - lowerquartile;
1200}
1201
1203 // Computes the user's input kernel function canonical bandwidth
1204 fCanonicalBandwidths[kUserDefined] = std::pow(ComputeKernelL2Norm() / std::pow(ComputeKernelSigma2(), 2), 1. / 5.);
1205}
1206
1208 // Computes the user's input kernel function sigma2
1210}
1211
1212TKDE::KernelIntegrand::KernelIntegrand(const TKDE* kde, EIntegralResult intRes) : fKDE(kde), fIntegralResult(intRes) {
1213 // Internal class constructor
1214}
1215
1217 // Internal class unary function
1218 if (fIntegralResult == kNorm) {
1219 return std::pow((*fKDE->fKernelFunction)(x), 2);
1220 } else if (fIntegralResult == kMu) {
1221 return x * (*fKDE->fKernelFunction)(x);
1222 } else if (fIntegralResult == kSigma2) {
1223 return std::pow(x, 2) * (*fKDE->fKernelFunction)(x);
1224 } else if (fIntegralResult == kUnitIntegration) {
1225 return (*fKDE->fKernelFunction)(x);
1226 } else {
1227 return -1;
1228 }
1229}
1230
1232 //Returns the estimated density
1233 TString name = "KDEFunc_"; name+= GetName();
1234 TString title = "KDE "; title+= GetTitle();
1235 if (xMin >= xMax) { xMin = fXMin; xMax = fXMax; }
1236 //Do not register the TF1 to global list
1237 bool previous = TF1::DefaultAddToGlobalList(kFALSE);
1238 TF1 * pdf = new TF1(name.Data(), this, xMin, xMax, 0);
1240 if (npx > 0) pdf->SetNpx(npx);
1241 pdf->SetTitle(title);
1242 return pdf;
1243}
1244
1246 // Returns the upper estimated density
1247 TString name;
1248 name.Form("KDE_UpperCL%f5.3_%s",confidenceLevel,GetName());
1249 if (xMin >= xMax) { xMin = fXMin; xMax = fXMax; }
1250 TF1 * upperPDF = new TF1(name, this, &TKDE::UpperConfidenceInterval, xMin, xMax, 1);
1251 upperPDF->SetParameter(0, confidenceLevel);
1252 if (npx > 0) upperPDF->SetNpx(npx);
1253 TF1 * f = (TF1*)upperPDF->Clone();
1254 delete upperPDF;
1255 return f;
1256}
1257
1259 // Returns the upper estimated density
1260 TString name;
1261 name.Form("KDE_LowerCL%f5.3_%s",confidenceLevel,GetName());
1262 if (xMin >= xMax) { xMin = fXMin; xMax = fXMax; }
1263 TF1 * lowerPDF = new TF1(name, this, &TKDE::LowerConfidenceInterval, xMin, xMax, 1);
1264 lowerPDF->SetParameter(0, confidenceLevel);
1265 if (npx > 0) lowerPDF->SetNpx(npx);
1266 TF1 * f = (TF1*)lowerPDF->Clone();
1267 delete lowerPDF;
1268 return f;
1269}
1270
1272 // Returns the approximate bias
1273 TString name = "KDE_Bias_"; name += GetName();
1274 if (xMin >= xMax) { xMin = fXMin; xMax = fXMax; }
1275 TF1 * approximateBias = new TF1(name, this, &TKDE::ApproximateBias, xMin, xMax, 0);
1276 if (npx > 0) approximateBias->SetNpx(npx);
1277 TF1 * f = (TF1*)approximateBias->Clone();
1278 delete approximateBias;
1279 return f;
1280}
#define f(i)
Definition RSha256.hxx:104
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define M_PI
Definition Rotated.cxx:105
int Int_t
Definition RtypesCore.h:45
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
constexpr Bool_t kTRUE
Definition RtypesCore.h:93
const char Option_t
Definition RtypesCore.h:66
#define ClassImp(name)
Definition Rtypes.h:382
@ kRed
Definition Rtypes.h:66
@ kBlue
Definition Rtypes.h:66
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t option
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition TGX11.cxx:110
float xmin
float xmax
Int_t gDebug
Definition TROOT.cxx:597
#define gPad
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Definition IFunction.h:112
User Class for performing numerical integration of a function in one dimension.
Definition Integrator.h:98
void SetFunction(Function &f)
method to set the a generic integration function
Definition Integrator.h:492
double Integral(Function &f, double a, double b)
evaluate the Integral of a function f over the defined interval (a,b)
Definition Integrator.h:499
User class for calculating the derivatives of a function.
double Derivative2(double x)
Returns the second derivative of the function at point x, computed by Richardson's extrapolation meth...
void SetFunction(const IGenFunction &f)
Set function for derivative calculation (copy the function if option has been enabled in the construc...
Template class to wrap any C++ callable object which takes one argument i.e.
Template class to wrap any member function of a class taking a double and returning a double in a 1D ...
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition TAttLine.h:40
1-Dim function class
Definition TF1.h:233
void SetTitle(const char *title="") override
Set function title if title has the form "fffffff;xxxx;yyyy", it is assumed that the function title i...
Definition TF1.cxx:3558
virtual void SetNpx(Int_t npx=100)
Set the number of points used to draw the function.
Definition TF1.cxx:3433
void Draw(Option_t *option="") override
Draw this function with its current attributes.
Definition TF1.cxx:1333
static Bool_t DefaultAddToGlobalList(Bool_t on=kTRUE)
Static method to add/avoid to add automatically functions to the global list (gROOT->GetListOfFunctio...
Definition TF1.cxx:838
virtual void SetParameter(Int_t param, Double_t value)
Definition TF1.h:667
TObject * Clone(const char *newname=nullptr) const override
Make a complete copy of the underlying object.
Definition TF1.cxx:1064
A TGraphErrors is a TGraph with error bars.
void SetName(const char *name="") override
Set graph name.
Definition TGraph.cxx:2381
void Draw(Option_t *chopt="") override
Draw this graph with its current attributes.
Definition TGraph.cxx:831
void SetTitle(const char *title="") override
Change (i.e.
Definition TGraph.cxx:2397
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:670
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition TH1.cxx:7556
Double_t GetRMS(Int_t axis=1) const
This function returns the Standard Deviation (Sigma) of the distribution not the Root Mean Square (RM...
Definition TH1.h:319
virtual void FillN(Int_t ntimes, const Double_t *x, const Double_t *w, Int_t stride=1)
Fill this histogram with an array x and weights w.
Definition TH1.cxx:3447
virtual Int_t GetQuantiles(Int_t n, Double_t *xp, const Double_t *p=nullptr)
Compute Quantiles for this histogram Quantile x_p := Q(p) is defined as the value x_p such that the c...
Definition TH1.cxx:4611
virtual Double_t GetSumOfWeights() const
Return the sum of weights excluding under/overflows.
Definition TH1.cxx:7938
void ComputeAdaptiveWeights()
Definition TKDE.cxx:776
Double_t GetFixedWeight() const
Definition TKDE.cxx:1000
TKernel(Double_t weight, TKDE *kde)
Definition TKDE.cxx:769
Double_t GetWeight(Double_t x) const
Definition TKDE.cxx:814
Double_t operator()(Double_t x) const
Definition TKDE.cxx:1010
const std::vector< Double_t > & GetAdaptiveWeights() const
Definition TKDE.cxx:1005
Kernel Density Estimation class.
Definition TKDE.h:37
~TKDE() override
Definition TKDE.cxx:77
TF1 * GetPDFUpperConfidenceInterval(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition TKDE.cxx:1245
TF1 * GetKDEApproximateBias(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition TKDE.cxx:1271
void SetData(const Double_t *data, const Double_t *weights)
Definition TKDE.cxx:442
TF1 * fLowerPDF
Output Kernel Density Estimation upper confidence interval PDF function.
Definition TKDE.h:195
std::vector< Double_t > fKernelSigmas2
Definition TKDE.h:226
Double_t ComputeKernelL2Norm() const
Definition TKDE.cxx:1124
TF1 * fPDF
Definition TKDE.h:193
TF1 * GetPDFLowerConfidenceInterval(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition TKDE.cxx:1258
void SetKernelType(EKernelType kern)
Definition TKDE.cxx:309
std::vector< Double_t > fCanonicalBandwidths
Definition TKDE.h:225
UInt_t fNEvents
Data's number of events.
Definition TKDE.h:211
void ComputeDataStats()
Internal function to compute statistics (mean,stddev) using always all the provided data (i....
Definition TKDE.cxx:1162
Double_t fXMax
Data maximum value.
Definition TKDE.h:219
Double_t UpperConfidenceInterval(const Double_t *x, const Double_t *p) const
Valid if the bandwidth is small compared to nEvents**1/5.
Definition TKDE.cxx:1063
void ReInit()
Definition TKDE.cxx:478
Double_t ApproximateBias(const Double_t *x, const Double_t *) const
Definition TKDE.h:259
Double_t ComputeMidspread()
Definition TKDE.cxx:1191
Bool_t fNewData
Flag to control when new data are given.
Definition TKDE.h:207
void SetMirror()
Definition TKDE.cxx:433
Bool_t fUseMirroring
Definition TKDE.h:205
void DrawConfidenceInterval(TString &drawOpt, double cl=0.95)
// Draws the KDE and its confidence interval
Definition TKDE.cxx:965
void SetMirroredEvents()
Intgernal function to mirror the data.
Definition TKDE.cxx:526
EMirror fMirror
Definition TKDE.h:201
void SetUserCanonicalBandwidth()
Definition TKDE.cxx:1202
Bool_t fMirrorLeft
Definition TKDE.h:205
EIteration fIteration
Definition TKDE.h:200
void CheckKernelValidity()
Definition TKDE.cxx:1100
const Double_t * GetAdaptiveWeights() const
Definition TKDE.cxx:990
Double_t fAdaptiveBandwidthFactor
Geometric mean of the kernel density estimation from the data for adaptive iteration.
Definition TKDE.h:221
void SetKernelFunction(KernelFunction_Ptr kernfunc=nullptr)
Definition TKDE.cxx:625
Double_t LowerConfidenceInterval(const Double_t *x, const Double_t *p) const
Valid if the bandwidth is small compared to nEvents**1/5.
Definition TKDE.cxx:1072
Double_t fSigmaRob
Data std deviation (robust estimation)
Definition TKDE.h:217
std::vector< Double_t > fBinCount
Number of events per bin for binned data option.
Definition TKDE.h:228
void Draw(const Option_t *option="") override
Draws either the KDE functions or its errors.
Definition TKDE.cxx:895
EBinning fBinning
Definition TKDE.h:202
EIteration
Iteration types. They can be set using SetIteration()
Definition TKDE.h:52
@ kAdaptive
Definition TKDE.h:53
@ kFixed
Definition TKDE.h:54
Double_t GetRAMISE() const
Definition TKDE.cxx:763
void SetIteration(EIteration iter)
Definition TKDE.cxx:321
Double_t ComputeKernelIntegral() const
Definition TKDE.cxx:1151
Double_t CosineArchKernel(Double_t x) const
Returns the kernel evaluation at x.
Definition TKDE.h:254
Double_t fXMin
Data minimum value.
Definition TKDE.h:218
Double_t operator()(Double_t x) const
Definition TKDE.cxx:741
void SetUserKernelSigma2()
Definition TKDE.cxx:1207
Double_t GetBias(Double_t x) const
Definition TKDE.cxx:1082
std::vector< Double_t > fData
Data events.
Definition TKDE.h:189
Double_t fSumOfCounts
Data sum of weights.
Definition TKDE.h:212
UInt_t fUseBinsNEvents
If the algorithm is allowed to use automatic (relaxed) binning this is the minimum number of events t...
Definition TKDE.h:213
void SetKernel()
Definition TKDE.cxx:596
Double_t fSigma
Data std deviation.
Definition TKDE.h:216
TGraphErrors * GetGraphWithErrors(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
return a TGraphErrors with the KDE values and errors The return object is managed by the user
Definition TKDE.cxx:940
Double_t GetMean() const
Definition TKDE.cxx:751
Double_t fRho
Adjustment factor for sigma.
Definition TKDE.h:220
Bool_t fAsymRight
Definition TKDE.h:205
Double_t fWeightSize
Caches the weight size.
Definition TKDE.h:223
void SetUseBinsNEvents(UInt_t nEvents)
Definition TKDE.cxx:364
std::vector< Double_t > fEvents
Original data storage.
Definition TKDE.h:190
Double_t GetError(Double_t x) const
Definition TKDE.cxx:1091
TF1 * GetKDEFunction(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition TKDE.cxx:1231
void SetBinning(EBinning)
Definition TKDE.cxx:339
void GetOptions(std::string optionType, std::string option)
Definition TKDE.cxx:198
Bool_t fAsymLeft
Definition TKDE.h:205
std::vector< Bool_t > fSettedOptions
User input options flag.
Definition TKDE.h:230
Double_t GaussianKernel(Double_t x) const
Returns the kernel evaluation at x.
Definition TKDE.h:239
void SetRange(Double_t xMin, Double_t xMax)
By default computed from the data.
Definition TKDE.cxx:380
void SetMean()
Definition TKDE.cxx:585
Double_t ComputeKernelSigma2() const
Definition TKDE.cxx:1133
void SetOptions(const Option_t *option, Double_t rho)
Definition TKDE.cxx:126
void SetUseBins()
Definition TKDE.cxx:394
Double_t GetFixedWeight() const
Definition TKDE.cxx:979
void AssureOptions()
Definition TKDE.cxx:270
TF1 * GetFunction(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition TKDE.cxx:689
void SetBinCountData()
Definition TKDE.cxx:828
TF1 * GetUpperFunction(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition TKDE.cxx:698
void InitFromNewData()
Definition TKDE.cxx:500
void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t *data, const Double_t *weight, Double_t xMin, Double_t xMax, const Option_t *option, Double_t rho)
Definition TKDE.cxx:91
void SetDrawOptions(const Option_t *option, TString &plotOpt, TString &drawOpt)
Definition TKDE.cxx:152
EMirror
Data "mirroring" option to address the probability "spill out" boundary effect They can be set using ...
Definition TKDE.h:59
@ kMirrorRightAsymLeft
Definition TKDE.h:65
@ kMirrorLeft
Definition TKDE.h:61
@ kMirrorAsymRight
Definition TKDE.h:66
@ kMirrorAsymBoth
Definition TKDE.h:68
@ kNoMirror
Definition TKDE.h:60
@ kMirrorRight
Definition TKDE.h:62
@ kMirrorLeftAsymRight
Definition TKDE.h:67
@ kMirrorBoth
Definition TKDE.h:63
@ kMirrorAsymLeft
Definition TKDE.h:64
TGraphErrors * fGraph
Output Kernel Density Estimation approximate bias.
Definition TKDE.h:197
void SetCanonicalBandwidths()
Definition TKDE.cxx:672
TKDE()
default constructor used only by I/O
Definition TKDE.cxx:61
void SetBinCentreData(Double_t xmin, Double_t xmax)
Definition TKDE.cxx:819
void SetTuneFactor(Double_t rho)
Definition TKDE.cxx:370
UInt_t Index(Double_t x) const
compute the bin index given a data point x
Definition TKDE.cxx:1050
void Fill(Double_t data)
Definition TKDE.cxx:713
TF1 * fUpperPDF
Output Kernel Density Estimation PDF function.
Definition TKDE.h:194
Bool_t fMirrorRight
Definition TKDE.h:205
UInt_t fNBins
Number of bins for binned data option.
Definition TKDE.h:210
Double_t ComputeKernelMu() const
Definition TKDE.cxx:1142
EKernelType
Types of Kernel functions They can be set using the function SetKernelType() or as a string in the co...
Definition TKDE.h:42
@ kGaussian
Definition TKDE.h:43
@ kEpanechnikov
Definition TKDE.h:44
@ kCosineArch
Definition TKDE.h:46
@ kBiweight
Definition TKDE.h:45
@ kTotalKernels
Internal use only for member initialization.
Definition TKDE.h:48
@ kUserDefined
Internal use only for the class's template constructor.
Definition TKDE.h:47
Double_t fMean
Data mean.
Definition TKDE.h:215
void DrawErrors(TString &drawOpt)
Draws a TGraphErrors with KDE values and errors.
Definition TKDE.cxx:931
void SetNBins(UInt_t nbins)
Definition TKDE.cxx:346
void CheckOptions(Bool_t isUserDefinedKernel=kFALSE)
Definition TKDE.cxx:286
Double_t EpanechnikovKernel(Double_t x) const
Definition TKDE.h:244
Double_t BiweightKernel(Double_t x) const
Returns the kernel evaluation at x.
Definition TKDE.h:249
void SetKernelSigmas2()
Definition TKDE.cxx:681
std::unique_ptr< TKernel > fKernel
! internal kernel class. Transient because it is recreated after reading from a file
Definition TKDE.h:187
Bool_t fUseMinMaxFromData
Flag top control if min and max must be used from data.
Definition TKDE.h:208
Double_t GetSigma() const
Definition TKDE.cxx:757
EKernelType fKernelType
Graph with the errors.
Definition TKDE.h:199
TF1 * fApproximateBias
Output Kernel Density Estimation lower confidence interval PDF function.
Definition TKDE.h:196
void SetSigma(Double_t R)
Definition TKDE.cxx:590
std::vector< Double_t > fEventWeights
Original data weights.
Definition TKDE.h:191
TF1 * GetApproximateBias(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition TKDE.cxx:708
TF1 * GetLowerFunction(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition TKDE.cxx:703
Bool_t fUseBins
Definition TKDE.h:206
KernelFunction_Ptr fKernelFunction
! pointer to kernel function
Definition TKDE.h:185
EBinning
Data binning option.
Definition TKDE.h:73
@ kUnbinned
Definition TKDE.h:74
@ kRelaxedBinning
The algorithm is allowed to use binning if the data is large enough.
Definition TKDE.h:75
@ kForcedBinning
Definition TKDE.h:76
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
const char * GetTitle() const override
Returns title of object.
Definition TNamed.h:48
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition TObject.cxx:991
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition TObject.cxx:1005
virtual void Fatal(const char *method, const char *msgfmt,...) const
Issue fatal error message.
Definition TObject.cxx:1033
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition TObject.cxx:979
Basic string class.
Definition TString.h:139
void ToLower()
Change string to lower-case.
Definition TString.cxx:1182
void Clear()
Clear string without changing its capacity.
Definition TString.cxx:1235
const char * Data() const
Definition TString.h:376
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition TString.h:704
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition TString.cxx:2356
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:632
@ kGAUSS
simple Gauss integration method with fixed rule
double normal_quantile(double z, double sigma)
Inverse ( ) of the cumulative distribution function of the lower tail of the normal (Gaussian) distri...
const Double_t sigma
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
Double_t ey[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
TH1F * h1
Definition legend1.C:5
Bool_t IsNaN(Double_t x)
Definition TMath.h:892
Double_t QuietNaN()
Returns a quiet NaN as defined by IEEE 754.
Definition TMath.h:902
void Quantiles(Int_t n, Int_t nprob, Double_t *x, Double_t *quantiles, Double_t *prob, Bool_t isSorted=kTRUE, Int_t *index=nullptr, Int_t type=7)
Computes sample quantiles, corresponding to the given probabilities.
Definition TMath.cxx:1207
const TKDE * fKDE
Definition TKDE.cxx:55
KernelIntegrand(const TKDE *kde, EIntegralResult intRes)
Definition TKDE.cxx:1212
Double_t operator()(Double_t x) const
Definition TKDE.cxx:1216
EIntegralResult fIntegralResult
Definition TKDE.cxx:56