Logo ROOT   6.10/09
Reference Guide
KDEKernel.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Asen Christov
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6  * Package: TMVA *
7  * Class : TMVA::KDEKernel *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * Freiburg U., Germany *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://tmva.sourceforge.net/LICENSE) *
24  **********************************************************************************/
25 
26 /*! \class TMVA::KDEKernel
27 \ingroup TMVA
28 
29 KDE Kernel for "smoothing" the PDFs.
30 
31 */
32 
33 #include "TH1.h"
34 #include "TH1F.h"
35 #include "TF1.h"
36 
37 // #if ROOT_VERSION_CODE >= 364802
38 // #ifndef ROOT_TMathBase
39 // #include "TMathBase.h"
40 // #endif
41 // #else
42 // #ifndef ROOT_TMath
43 #include "TMath.h"
44 // #endif
45 // #endif
46 
47 #include "TMVA/KDEKernel.h"
48 #include "TMVA/MsgLogger.h"
49 #include "TMVA/Types.h"
50 
52 
53 ////////////////////////////////////////////////////////////////////////////////
54 /// constructor
55 /// sanity check
56 
57 TMVA::KDEKernel::KDEKernel( EKernelIter kiter, const TH1 *hist, Float_t lower_edge, Float_t upper_edge,
58  EKernelBorder kborder, Float_t FineFactor )
59 : fSigma( 1. ),
60  fIter ( kiter ),
61  fLowerEdge (lower_edge ),
62  fUpperEdge (upper_edge),
63  fFineFactor ( FineFactor ),
64  fKernel_integ ( 0 ),
65  fKDEborder ( kborder ),
66  fLogger( new MsgLogger("KDEKernel") )
67 {
68  if (hist == NULL) {
69  Log() << kFATAL << "Called without valid histogram pointer (hist)!" << Endl;
70  }
71 
72  fHist = (TH1F*)hist->Clone();
73  fFirstIterHist = (TH1F*)hist->Clone();
74  fFirstIterHist->Reset(); // now it is empty but with the proper binning
75  fSigmaHist = (TH1F*)hist->Clone();
76  fSigmaHist->Reset(); // now fSigmaHist is empty but with the proper binning
77 
78  fHiddenIteration=false;
79 }
80 
81 ////////////////////////////////////////////////////////////////////////////////
82 /// destructor
83 
85 {
86  if (fHist != NULL) delete fHist;
87  if (fFirstIterHist != NULL) delete fFirstIterHist;
88  if (fSigmaHist != NULL) delete fSigmaHist;
89  if (fKernel_integ != NULL) delete fKernel_integ;
90  delete fLogger;
91 }
92 
93 ////////////////////////////////////////////////////////////////////////////////
94 /// when using Gaussian as Kernel function this is faster way to calculate the integrals
95 
97 {
98  if ( (par[1]<=0) || (x[0]>x[1])) return -1.;
99 
100  Float_t xs1=(x[0]-par[0])/par[1];
101  Float_t xs2=(x[1]-par[0])/par[1];
102 
103  if (xs1==0) {
104  if (xs2==0) return 0.;
105  if (xs2>0 ) return 0.5*TMath::Erf(xs2);
106  }
107  if (xs2==0) return 0.5*TMath::Erf(TMath::Abs(xs1));
108  if (xs1>0) return 0.5*(TMath::Erf(xs2)-TMath::Erf(xs1));
109  if (xs1<0) {
110  if (xs2>0 ) return 0.5*(TMath::Erf(xs2)+TMath::Erf(TMath::Abs(xs1)));
111  else return 0.5*(TMath::Erf(TMath::Abs(xs1))-TMath::Erf(TMath::Abs(xs2)));
112  }
113  return -1.;
114 }
115 
116 ////////////////////////////////////////////////////////////////////////////////
117 /// fIter == 1 ---> nonadaptive KDE
118 /// fIter == 2 ---> adaptive KDE
119 
121 {
122  if (ktype == kGauss) {
123 
124  // i.e. gauss kernel
125  //
126  // this is going to be done for both (nonadaptive KDE and adaptive KDE)
127  // for nonadaptive KDE this is the only = final thing to do
128  // for adaptive KDE this is going to be used in the first (hidden) iteration
129  fKernel_integ = new TF1("GaussIntegral",GaussIntegral,fLowerEdge,fUpperEdge,4);
130  fSigma = ( TMath::Sqrt(2.0)
131  *TMath::Power(4./3., 0.2)
132  *fHist->GetRMS()
133  *TMath::Power(fHist->Integral(), -0.2) );
134  // this formula for sigma is valid for Gaussian Kernel function (nonadaptive KDE).
135  // formula found in:
136  // Multivariate Density Estimation, Theory, Practice and Visualization D. W. SCOTT, 1992 New York, Wiley
137  if (fSigma <= 0 ) {
138  Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
139  }
140  }
141 
142  if (fIter == kAdaptiveKDE) {
143 
144  // this is done only for adaptive KDE
145 
146  // fill a temporary histo using nonadaptive KDE
147  // this histo is identical with the final output when using only nonadaptive KDE
148  fHiddenIteration=true;
149 
150  Float_t histoLowEdge=fHist->GetBinLowEdge(1);
151  Float_t histoUpperEdge=fHist->GetBinLowEdge(fHist->GetNbinsX()+1);
152 
153  for (Int_t i=1;i<fHist->GetNbinsX();i++) {
154  // loop over the bins of the original histo
155  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
156  // loop over the bins of the PDF histo and fill it
160  fHist->GetBinCenter(i),
161  i)
162  );
163  }
164  if (fKDEborder == 3) { // mirror the samples and fill them again
165  // in order to save time do the mirroring only for the first (the lower) 1/5 of the histo to the left;
166  // and the last (the higher) 1/5 of the histo to the right.
167  // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
168  if (i < fHist->GetNbinsX()/5 ) { // the first (the lower) 1/5 of the histo
169  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
170  // loop over the bins of the PDF histo and fill it
174  2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
175  i)
176  );
177  }
178  }
179  if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
180  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
181  // loop over the bins of the PDF histo and fill it
185  2*histoUpperEdge-fHist->GetBinCenter(i), // mirroring to the right
186  i)
187  );
188  }
189  }
190  }
191  }
192 
193  fFirstIterHist->SetEntries(fHist->GetEntries()); //set the number of entries to be the same as the original histo
194 
195  // do "function like" integration = sum of (bin_width*bin_content):
196  Float_t integ=0;
197  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++)
199  fFirstIterHist->Scale(1./integ);
200 
201  fHiddenIteration=false;
202 
203  // OK, now we have the first iteration,
204  // next: calculate the Sigmas (Widths) for the second (adaptive) iteration
205  // based on the output of the first iteration
206  // these Sigmas will be stored in histo called fSigmaHist
207  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
208  // loop over the bins of the PDF histo and fill fSigmaHist
209  if (fSigma*TMath::Sqrt(1.0/fFirstIterHist->GetBinContent(j)) <= 0 ) {
210  Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
211  }
212 
214  }
215  }
216 
217  if (fKernel_integ ==0 ) {
218  Log() << kFATAL << "KDE kernel not correctly initialized!" << Endl;
219  }
220 }
221 
222 ////////////////////////////////////////////////////////////////////////////////
223 /// calculates the integral of the Kernel
224 
226 {
228  fKernel_integ->SetParameters(mean,fSigma); // non adaptive KDE
229  else if ((fIter == kAdaptiveKDE) && !fHiddenIteration )
230  fKernel_integ->SetParameters(mean,fSigmaHist->GetBinContent(binnum)); // adaptive KDE
231 
232  if ( fKDEborder == 2 ) { // renormalization of the kernel function
233  Float_t renormFactor=1.0/fKernel_integ->Eval(fLowerEdge,fUpperEdge);
234  return (renormFactor*fKernel_integ->Eval(lowr,highr));
235  }
236 
237  // the RenormFactor takes care about the "border" effects, i.e. sets the
238  // integral to one inside the histogram borders
239  return (fKernel_integ->Eval(lowr,highr));
240 }
241 
double par[1]
Definition: unuranDistr.cxx:38
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:5937
Double_t GaussIntegral(Double_t *x, Double_t *par)
when using Gaussian as Kernel function this is faster way to calculate the integrals ...
Definition: KDEKernel.cxx:96
virtual void SetParameters(const Double_t *params)
Definition: TF1.h:588
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8253
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:649
Float_t fLowerEdge
Definition: KDEKernel.h:77
float Float_t
Definition: RtypesCore.h:53
Float_t fUpperEdge
Definition: KDEKernel.h:78
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4639
virtual Double_t Integral(Option_t *option="") const
Return integral of bin contents.
Definition: TH1.cxx:7127
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
Definition: TH1.h:563
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:551
int Int_t
Definition: RtypesCore.h:41
MsgLogger & Log() const
Definition: KDEKernel.h:89
Double_t GetRMS(Int_t axis=1) const
Definition: TH1.h:296
TH1F * fHist
Definition: KDEKernel.h:82
Float_t fFineFactor
Definition: KDEKernel.h:79
#define NULL
Definition: RtypesCore.h:88
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8264
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:628
Bool_t fHiddenIteration
Definition: KDEKernel.h:85
Double_t x[n]
Definition: legend1.C:17
KDE Kernel for "smoothing" the PDFs.
Definition: KDEKernel.h:50
EKernelIter fIter
Definition: KDEKernel.h:76
EKernelBorder fKDEborder
Definition: KDEKernel.h:81
TH1F * fFirstIterHist
Definition: KDEKernel.h:83
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:187
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8325
TH1F * fSigmaHist
Definition: KDEKernel.h:84
MsgLogger * fLogger
Definition: KDEKernel.h:88
Float_t fSigma
Definition: KDEKernel.h:75
virtual Double_t Eval(Double_t x, Double_t y=0, Double_t z=0, Double_t t=0) const
Evaluate this function.
Definition: TF1.cxx:1197
virtual Double_t GetBinWidth(Int_t bin) const
Return bin width for 1D histogram.
Definition: TH1.cxx:8275
#define ClassImp(name)
Definition: Rtypes.h:336
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
Definition: KDEKernel.cxx:225
double Double_t
Definition: RtypesCore.h:55
virtual ~KDEKernel(void)
destructor
Definition: KDEKernel.cxx:84
The TH1 histogram class.
Definition: TH1.h:56
virtual Double_t GetEntries() const
Return the current number of entries.
Definition: TH1.cxx:4054
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TObject.cxx:151
Abstract ClassifierFactory template that handles arbitrary types.
1-Dim function class
Definition: TF1.h:150
virtual void SetEntries(Double_t n)
Definition: TH1.h:363
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Definition: KDEKernel.cxx:120
virtual Int_t GetNbinsX() const
Definition: TH1.h:277
Double_t Sqrt(Double_t x)
Definition: TMath.h:591
TF1 * fKernel_integ
Definition: KDEKernel.h:80