Logo ROOT   6.16/01
Reference Guide
TKDE.h
Go to the documentation of this file.
1// @(#)root/hist:$Id$
2// Authors: Bartolomeu Rabacal 07/2010
3/**********************************************************************
4 * *
5 * Copyright (c) 2006 , LCG ROOT MathLib Team *
6 * *
7 * *
8 **********************************************************************/
9// Header file for TKDE
10
11#ifndef ROOT_TKDE
12#define ROOT_TKDE
13
15
16#include "TNamed.h"
17
18#include "Math/Math.h"
19
20//#include "TF1.h"
21class TGraphErrors;
22class TF1;
23
24/*
25 Kernel Density Estimation class. The three main references are (1) "Scott DW, Multivariate Density Estimation.
26Theory, Practice and Visualization. New York: Wiley", (2) "Jann Ben - ETH Zurich, Switzerland -, Univariate kernel density estimation document for KDENS: Stata module for univariate kernel density estimation." and (3) "Hardle W, Muller M, Sperlich S, Werwatz A, Nonparametric and Semiparametric Models. Springer."
27 The algorithm is briefly described in (4) "Cranmer KS, Kernel Estimation in High-Energy
28Physics. Computer Physics Communications 136:198-207,2001" - e-Print Archive: hep ex/0011057.
29 A binned version is also implemented to address the performance issue due to its data size dependence.
30*/
31class TKDE : public TNamed {
32public:
33
34 enum EKernelType { // Kernel function type option
39 kUserDefined, // Internal use only for the class's template constructor
40 kTotalKernels // Internal use only for member initialization
41 };
42
43 enum EIteration { // KDE fitting option
45 kFixed
46 };
47
48 enum EMirror { // Data "mirroring" option to address the probability "spill out" boundary effect
58 };
59
60 enum EBinning{ // Data binning option
62 kRelaxedBinning, // The algorithm is allowed to use binning if the data is large enough
64 };
65
66 explicit TKDE(UInt_t events = 0, const Double_t* data = 0, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option =
67 "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
68 Instantiate( nullptr, events, data, nullptr, xMin, xMax, option, rho);
69 }
70
71 TKDE(UInt_t events, const Double_t* data, const Double_t* dataWeight, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option =
72 "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
73 Instantiate( nullptr, events, data, dataWeight, xMin, xMax, option, rho);
74 }
75
76 template<class KernelFunction>
77 TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
78 Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, nullptr, xMin, xMax, option, rho);
79 }
80 template<class KernelFunction>
81 TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, const Double_t * dataWeight, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
82 Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, dataWeight, xMin, xMax, option, rho);
83 }
84
85 virtual ~TKDE();
86
87 void Fill(Double_t data);
88 void Fill(Double_t data, Double_t weight);
89 void SetKernelType(EKernelType kern);
90 void SetIteration(EIteration iter);
91 void SetMirror(EMirror mir);
92 void SetBinning(EBinning);
93 void SetNBins(UInt_t nbins);
94 void SetUseBinsNEvents(UInt_t nEvents);
95 void SetTuneFactor(Double_t rho);
96 void SetRange(Double_t xMin, Double_t xMax); // By default computed from the data
97
98 virtual void Draw(const Option_t* option = "");
99
101 Double_t operator()(const Double_t* x, const Double_t* p=0) const; // Needed for creating TF1
102
103 Double_t GetValue(Double_t x) const { return (*this)(x); }
105
106 Double_t GetBias(Double_t x) const;
107 Double_t GetMean() const;
108 Double_t GetSigma() const;
109 Double_t GetRAMISE() const;
110
111 Double_t GetFixedWeight() const;
112
113 TF1* GetFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
114 TF1* GetUpperFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
115 TF1* GetLowerFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
116 TF1* GetApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
117 TGraphErrors * GetGraphWithErrors(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
118
119 // get the drawn object to chanage settings
120 // These objects are managed by TKDE and should not be deleted by the user
121 TF1 * GetDrawnFunction() { return fPDF;}
125
126 const Double_t * GetAdaptiveWeights() const;
127
128
129private:
130
131 TKDE(TKDE& kde); // Disallowed copy constructor
132 TKDE operator=(TKDE& kde); // Disallowed assign operator
133
136
137 class TKernel;
138 friend class TKernel;
139
141
142 std::vector<Double_t> fData; // Data events
143 std::vector<Double_t> fEvents; // Original data storage
144 std::vector<Double_t> fEventWeights; // Original data weights
145
146 TF1* fPDF; // Output Kernel Density Estimation PDF function
147 TF1* fUpperPDF; // Output Kernel Density Estimation upper confidence interval PDF function
148 TF1* fLowerPDF; // Output Kernel Density Estimation lower confidence interval PDF function
149 TF1* fApproximateBias; // Output Kernel Density Estimation approximate bias
150 TGraphErrors* fGraph; // Graph with the errors
151
156
159 Bool_t fNewData; // flag to control when new data are given
160 Bool_t fUseMinMaxFromData; // flag top control if min and max must be used from data
161
162 UInt_t fNBins; // Number of bins for binned data option
163 UInt_t fNEvents; // Data's number of events
164 Double_t fSumOfCounts; // Data sum of weights
165 UInt_t fUseBinsNEvents; // If the algorithm is allowed to use binning this is the minimum number of events to do so
166
167 Double_t fMean; // Data mean
168 Double_t fSigma; // Data std deviation
169 Double_t fSigmaRob; // Data std deviation (robust estimation)
170 Double_t fXMin; // Data minimum value
171 Double_t fXMax; // Data maximum value
172 Double_t fRho; // Adjustment factor for sigma
173 Double_t fAdaptiveBandwidthFactor; // Geometric mean of the kernel density estimation from the data for adaptive iteration
174
175 Double_t fWeightSize; // Caches the weight size
176
177 std::vector<Double_t> fCanonicalBandwidths;
178 std::vector<Double_t> fKernelSigmas2;
179
180 std::vector<Double_t> fBinCount; // Number of events per bin for binned data option
181
182 std::vector<Bool_t> fSettedOptions; // User input options flag
183
184 struct KernelIntegrand;
185 friend struct KernelIntegrand;
186
187 void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t* data, const Double_t* weight,
188 Double_t xMin, Double_t xMax, const Option_t* option, Double_t rho);
189
191 // Returns the kernel evaluation at x
192 Double_t k2_PI_ROOT_INV = 0.398942280401432703; // (2 * M_PI)**-0.5
193 return (x > -9. && x < 9.) ? k2_PI_ROOT_INV * std::exp(-.5 * x * x) : 0.0;
194 }
196 return (x > -1. && x < 1.) ? 3. / 4. * (1. - x * x) : 0.0;
197 }
199 // Returns the kernel evaluation at x
200 return (x > -1. && x < 1.) ? 15. / 16. * (1. - x * x) * (1. - x * x) : 0.0;
201 }
203 // Returns the kernel evaluation at x
204 return (x > -1. && x < 1.) ? M_PI_4 * std::cos(M_PI_2 * x) : 0.0;
205 }
206 Double_t UpperConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
207 Double_t LowerConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
208 Double_t ApproximateBias(const Double_t* x, const Double_t* ) const { return GetBias(*x); }
214 void ComputeDataStats() ;
215
216 UInt_t Index(Double_t x) const;
217
219 void SetBinCountData();
220 void CheckKernelValidity();
222 void SetUserKernelSigma2();
224 void SetKernelSigmas2();
226 void SetUseBins();
227 void SetMirror();
228 void SetMean();
229 void SetSigma(Double_t R);
230 void SetKernel();
231 void SetKernelFunction(KernelFunction_Ptr kernfunc = 0);
232 void SetOptions(const Option_t* option, Double_t rho);
233 void CheckOptions(Bool_t isUserDefinedKernel = kFALSE);
234 void GetOptions(std::string optionType, std::string option);
235 void AssureOptions();
236 void SetData(const Double_t* data, const Double_t * weights);
237 void InitFromNewData();
238 void SetMirroredEvents();
239 void SetDrawOptions(const Option_t* option, TString& plotOpt, TString& drawOpt);
240 void DrawErrors(TString& drawOpt);
241 void DrawConfidenceInterval(TString& drawOpt, double cl=0.95);
242
243 TF1* GetKDEFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
244 TF1* GetKDEApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
245 // The density to estimate should be at least twice differentiable.
246 TF1* GetPDFUpperConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
247 TF1* GetPDFLowerConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
248
249 ClassDef(TKDE, 2) // One dimensional semi-parametric Kernel Density Estimation
250
251};
252
253#endif
#define M_PI_2
Definition: Math.h:42
#define M_PI_4
Definition: Math.h:46
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
char Char_t
Definition: RtypesCore.h:29
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
const char Option_t
Definition: RtypesCore.h:62
#define ClassDef(name, id)
Definition: Rtypes.h:324
float xmin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
double cos(double)
double exp(double)
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Definition: IFunction.h:135
Template class to wrap any C++ callable object which takes one argument i.e.
1-Dim function class
Definition: TF1.h:211
A TGraphErrors is a TGraph with error bars.
Definition: TGraphErrors.h:26
Kernel Density Estimation class.
Definition: TKDE.h:31
TF1 * GetPDFUpperConfidenceInterval(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1142
TF1 * GetKDEApproximateBias(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1168
void SetData(const Double_t *data, const Double_t *weights)
Definition: TKDE.cxx:446
TF1 * fLowerPDF
Definition: TKDE.h:148
virtual ~TKDE()
Definition: TKDE.cxx:105
std::vector< Double_t > fKernelSigmas2
Definition: TKDE.h:178
Double_t ComputeKernelL2Norm() const
Definition: TKDE.cxx:1023
TF1 * fPDF
Definition: TKDE.h:146
TF1 * GetPDFLowerConfidenceInterval(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1155
void SetKernelType(EKernelType kern)
Definition: TKDE.cxx:329
std::vector< Double_t > fCanonicalBandwidths
Definition: TKDE.h:177
void SetKernelFunction(KernelFunction_Ptr kernfunc=0)
Definition: TKDE.cxx:558
UInt_t fNEvents
Definition: TKDE.h:163
void ComputeDataStats()
Definition: TKDE.cxx:1059
Double_t fXMax
Definition: TKDE.h:171
Double_t UpperConfidenceInterval(const Double_t *x, const Double_t *p) const
Definition: TKDE.cxx:962
friend class TKernel
Definition: TKDE.h:137
TF1 * GetDrawnUpperFunction()
Definition: TKDE.h:122
Double_t ApproximateBias(const Double_t *x, const Double_t *) const
Definition: TKDE.h:208
Double_t ComputeMidspread()
Definition: TKDE.cxx:1088
Bool_t fNewData
Definition: TKDE.h:159
void SetMirror()
Definition: TKDE.cxx:437
Bool_t fUseMirroring
Definition: TKDE.h:157
void DrawConfidenceInterval(TString &drawOpt, double cl=0.95)
Definition: TKDE.cxx:862
TF1 * GetDrawnLowerFunction()
Definition: TKDE.h:123
void SetMirroredEvents()
Definition: TKDE.cxx:500
EMirror fMirror
Definition: TKDE.h:154
void SetUserCanonicalBandwidth()
Definition: TKDE.cxx:1099
Bool_t fMirrorLeft
Definition: TKDE.h:157
EIteration fIteration
Definition: TKDE.h:153
void CheckKernelValidity()
Definition: TKDE.cxx:999
TKDE(const Char_t *, const KernelFunction &kernfunc, UInt_t events, const Double_t *data, const Double_t *dataWeight, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:81
const Double_t * GetAdaptiveWeights() const
Definition: TKDE.cxx:888
Double_t fAdaptiveBandwidthFactor
Definition: TKDE.h:173
TF1 * GetDrawnFunction()
Definition: TKDE.h:121
Double_t LowerConfidenceInterval(const Double_t *x, const Double_t *p) const
Definition: TKDE.cxx:971
Double_t fSigmaRob
Definition: TKDE.h:169
std::vector< Double_t > fBinCount
Definition: TKDE.h:180
EBinning fBinning
Definition: TKDE.h:155
EIteration
Definition: TKDE.h:43
@ kAdaptive
Definition: TKDE.h:44
@ kFixed
Definition: TKDE.h:45
Double_t GetRAMISE() const
Definition: TKDE.cxx:689
void SetIteration(EIteration iter)
Definition: TKDE.cxx:340
Double_t ComputeKernelIntegral() const
Definition: TKDE.cxx:1050
Double_t CosineArchKernel(Double_t x) const
Definition: TKDE.h:202
Double_t fXMin
Definition: TKDE.h:170
Double_t operator()(Double_t x) const
Definition: TKDE.cxx:671
void SetUserKernelSigma2()
Definition: TKDE.cxx:1104
Double_t GetBias(Double_t x) const
Definition: TKDE.cxx:981
std::vector< Double_t > fData
Definition: TKDE.h:142
Double_t fSumOfCounts
Definition: TKDE.h:164
UInt_t fUseBinsNEvents
Definition: TKDE.h:165
void SetKernel()
Definition: TKDE.cxx:544
Double_t fSigma
Definition: TKDE.h:168
TGraphErrors * GetGraphWithErrors(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:838
Double_t GetMean() const
Definition: TKDE.cxx:677
Double_t fRho
Definition: TKDE.h:172
ROOT::Math::IBaseFunctionOneDim * KernelFunction_Ptr
Definition: TKDE.h:134
Bool_t fAsymRight
Definition: TKDE.h:157
Double_t fWeightSize
Definition: TKDE.h:175
TKernel * fKernel
Definition: TKDE.h:140
void SetUseBinsNEvents(UInt_t nEvents)
Definition: TKDE.cxx:387
std::vector< Double_t > fEvents
Definition: TKDE.h:143
Double_t GetError(Double_t x) const
Definition: TKDE.cxx:990
TF1 * GetKDEFunction(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1128
void SetBinning(EBinning)
Definition: TKDE.cxx:360
void GetOptions(std::string optionType, std::string option)
Definition: TKDE.cxx:223
Double_t GetValue(Double_t x) const
Definition: TKDE.h:103
Bool_t fAsymLeft
Definition: TKDE.h:157
std::vector< Bool_t > fSettedOptions
Definition: TKDE.h:182
Double_t GaussianKernel(Double_t x) const
Definition: TKDE.h:190
void SetRange(Double_t xMin, Double_t xMax)
Definition: TKDE.cxx:404
void SetMean()
Definition: TKDE.cxx:533
virtual void Draw(const Option_t *option="")
Definition: TKDE.cxx:780
Double_t ComputeKernelSigma2() const
Definition: TKDE.cxx:1032
void SetOptions(const Option_t *option, Double_t rho)
Definition: TKDE.cxx:153
void SetUseBins()
Definition: TKDE.cxx:418
Double_t GetFixedWeight() const
Definition: TKDE.cxx:877
void AssureOptions()
Definition: TKDE.cxx:290
TF1 * GetFunction(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:619
void SetBinCountData()
Definition: TKDE.cxx:742
TF1 * GetUpperFunction(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:628
void InitFromNewData()
Definition: TKDE.cxx:480
void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t *data, const Double_t *weight, Double_t xMin, Double_t xMax, const Option_t *option, Double_t rho)
Definition: TKDE.cxx:116
void SetDrawOptions(const Option_t *option, TString &plotOpt, TString &drawOpt)
Definition: TKDE.cxx:179
EMirror
Definition: TKDE.h:48
@ kMirrorLeft
Definition: TKDE.h:50
@ kMirrorAsymLeftRight
Definition: TKDE.h:54
@ kMirrorAsymRight
Definition: TKDE.h:55
@ kMirrorAsymBoth
Definition: TKDE.h:57
@ kNoMirror
Definition: TKDE.h:49
@ kMirrorRight
Definition: TKDE.h:51
@ kMirrorLeftAsymRight
Definition: TKDE.h:56
@ kMirrorBoth
Definition: TKDE.h:52
@ kMirrorAsymLeft
Definition: TKDE.h:53
TGraphErrors * GetDrawnGraph()
Definition: TKDE.h:124
TKDE(TKDE &kde)
TGraphErrors * fGraph
Definition: TKDE.h:150
void SetCanonicalBandwidths()
Definition: TKDE.cxx:602
void SetBinCentreData(Double_t xmin, Double_t xmax)
Definition: TKDE.cxx:733
TKDE(const Char_t *, const KernelFunction &kernfunc, UInt_t events, const Double_t *data, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:77
void SetTuneFactor(Double_t rho)
Definition: TKDE.cxx:394
UInt_t Index(Double_t x) const
Definition: TKDE.cxx:947
void Fill(Double_t data)
Definition: TKDE.cxx:643
TF1 * fUpperPDF
Definition: TKDE.h:147
Bool_t fMirrorRight
Definition: TKDE.h:157
UInt_t fNBins
Definition: TKDE.h:162
Double_t ComputeKernelMu() const
Definition: TKDE.cxx:1041
EKernelType
Definition: TKDE.h:34
@ kGaussian
Definition: TKDE.h:35
@ kEpanechnikov
Definition: TKDE.h:36
@ kCosineArch
Definition: TKDE.h:38
@ kBiweight
Definition: TKDE.h:37
@ kTotalKernels
Definition: TKDE.h:40
@ kUserDefined
Definition: TKDE.h:39
Double_t fMean
Definition: TKDE.h:167
void DrawErrors(TString &drawOpt)
Definition: TKDE.cxx:831
void SetNBins(UInt_t nbins)
Definition: TKDE.cxx:368
void CheckOptions(Bool_t isUserDefinedKernel=kFALSE)
Definition: TKDE.cxx:306
Double_t EpanechnikovKernel(Double_t x) const
Definition: TKDE.h:195
Double_t BiweightKernel(Double_t x) const
Definition: TKDE.h:198
void SetKernelSigmas2()
Definition: TKDE.cxx:611
Bool_t fUseMinMaxFromData
Definition: TKDE.h:160
Double_t GetSigma() const
Definition: TKDE.cxx:683
EKernelType fKernelType
Definition: TKDE.h:152
TF1 * fApproximateBias
Definition: TKDE.h:149
TKDE operator=(TKDE &kde)
void SetSigma(Double_t R)
Definition: TKDE.cxx:538
std::vector< Double_t > fEventWeights
Definition: TKDE.h:144
TF1 * GetApproximateBias(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:638
TF1 * GetLowerFunction(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:633
Bool_t fUseBins
Definition: TKDE.h:158
TKDE(UInt_t events=0, const Double_t *data=0, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:66
TKDE(UInt_t events, const Double_t *data, const Double_t *dataWeight, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:71
KernelFunction_Ptr fKernelFunction
Definition: TKDE.h:135
friend struct KernelIntegrand
Definition: TKDE.h:184
EBinning
Definition: TKDE.h:60
@ kUnbinned
Definition: TKDE.h:61
@ kRelaxedBinning
Definition: TKDE.h:62
@ kForcedBinning
Definition: TKDE.h:63
void SetHistogram()
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:29
Basic string class.
Definition: TString.h:131
Double_t x[n]
Definition: legend1.C:17