Logo ROOT   6.08/07
Reference Guide
KDEKernel.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Asen Christov
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6  * Package: TMVA *
7  * Class : TMVA::KDEKernel *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * Freiburg U., Germany *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://tmva.sourceforge.net/LICENSE) *
24  **********************************************************************************/
25 
26 #include "TH1.h"
27 #include "TH1F.h"
28 #include "TF1.h"
29 
30 // #if ROOT_VERSION_CODE >= 364802
31 // #ifndef ROOT_TMathBase
32 // #include "TMathBase.h"
33 // #endif
34 // #else
35 // #ifndef ROOT_TMath
36 #include "TMath.h"
37 // #endif
38 // #endif
39 
40 #include "TMVA/KDEKernel.h"
41 #include "TMVA/MsgLogger.h"
42 #include "TMVA/Types.h"
43 
45 
46 ////////////////////////////////////////////////////////////////////////////////
47 /// constructor
48 /// sanity check
49 
50 TMVA::KDEKernel::KDEKernel( EKernelIter kiter, const TH1 *hist, Float_t lower_edge, Float_t upper_edge,
51  EKernelBorder kborder, Float_t FineFactor )
52 : fSigma( 1. ),
53  fIter ( kiter ),
54  fLowerEdge (lower_edge ),
55  fUpperEdge (upper_edge),
56  fFineFactor ( FineFactor ),
57  fKernel_integ ( 0 ),
58  fKDEborder ( kborder ),
59  fLogger( new MsgLogger("KDEKernel") )
60 {
61  if (hist == NULL) {
62  Log() << kFATAL << "Called without valid histogram pointer (hist)!" << Endl;
63  }
64 
65  fHist = (TH1F*)hist->Clone();
66  fFirstIterHist = (TH1F*)hist->Clone();
67  fFirstIterHist->Reset(); // now it is empty but with the proper binning
68  fSigmaHist = (TH1F*)hist->Clone();
69  fSigmaHist->Reset(); // now fSigmaHist is empty but with the proper binning
70 
71  fHiddenIteration=false;
72 }
73 
74 ////////////////////////////////////////////////////////////////////////////////
75 /// destructor
76 
78 {
79  if (fHist != NULL) delete fHist;
80  if (fFirstIterHist != NULL) delete fFirstIterHist;
81  if (fSigmaHist != NULL) delete fSigmaHist;
82  if (fKernel_integ != NULL) delete fKernel_integ;
83  delete fLogger;
84 }
85 
86 ////////////////////////////////////////////////////////////////////////////////
87 /// when using Gaussian as Kernel function this is faster way to calculate the integrals
88 
90 {
91  if ( (par[1]<=0) || (x[0]>x[1])) return -1.;
92 
93  Float_t xs1=(x[0]-par[0])/par[1];
94  Float_t xs2=(x[1]-par[0])/par[1];
95 
96  if (xs1==0) {
97  if (xs2==0) return 0.;
98  if (xs2>0 ) return 0.5*TMath::Erf(xs2);
99  }
100  if (xs2==0) return 0.5*TMath::Erf(TMath::Abs(xs1));
101  if (xs1>0) return 0.5*(TMath::Erf(xs2)-TMath::Erf(xs1));
102  if (xs1<0) {
103  if (xs2>0 ) return 0.5*(TMath::Erf(xs2)+TMath::Erf(TMath::Abs(xs1)));
104  else return 0.5*(TMath::Erf(TMath::Abs(xs1))-TMath::Erf(TMath::Abs(xs2)));
105  }
106  return -1.;
107 }
108 
109 ////////////////////////////////////////////////////////////////////////////////
110 /// fIter == 1 ---> nonadaptive KDE
111 /// fIter == 2 ---> adaptive KDE
112 
114 {
115  if (ktype == kGauss) {
116 
117  // i.e. gauss kernel
118  //
119  // this is going to be done for both (nonadaptive KDE and adaptive KDE)
120  // for nonadaptive KDE this is the only = final thing to do
121  // for adaptive KDE this is going to be used in the first (hidden) iteration
122  fKernel_integ = new TF1("GaussIntegral",GaussIntegral,fLowerEdge,fUpperEdge,4);
123  fSigma = ( TMath::Sqrt(2.0)
124  *TMath::Power(4./3., 0.2)
125  *fHist->GetRMS()
126  *TMath::Power(fHist->Integral(), -0.2) );
127  // this formula for sigma is valid for Gaussian Kernel function (nonadaptive KDE).
128  // formula found in:
129  // Multivariate Density Estimation, Theory, Practice and Visualization D. W. SCOTT, 1992 New York, Wiley
130  if (fSigma <= 0 ) {
131  Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
132  }
133  }
134 
135  if (fIter == kAdaptiveKDE) {
136 
137  // this is done only for adaptive KDE
138 
139  // fill a temporary histo using nonadaptive KDE
140  // this histo is identical with the final output when using only nonadaptive KDE
141  fHiddenIteration=true;
142 
143  Float_t histoLowEdge=fHist->GetBinLowEdge(1);
144  Float_t histoUpperEdge=fHist->GetBinLowEdge(fHist->GetNbinsX()+1);
145 
146  for (Int_t i=1;i<fHist->GetNbinsX();i++) {
147  // loop over the bins of the original histo
148  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
149  // loop over the bins of the PDF histo and fill it
153  fHist->GetBinCenter(i),
154  i)
155  );
156  }
157  if (fKDEborder == 3) { // mirror the saples and fill them again
158  // in order to save time do the mirroring only for the first (the lowwer) 1/5 of the histo to the left;
159  // and the last (the higher) 1/5 of the histo to the right.
160  // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
161  if (i < fHist->GetNbinsX()/5 ) { // the first (the lowwer) 1/5 of the histo
162  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
163  // loop over the bins of the PDF histo and fill it
167  2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
168  i)
169  );
170  }
171  }
172  if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
173  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
174  // loop over the bins of the PDF histo and fill it
178  2*histoUpperEdge-fHist->GetBinCenter(i), // mirroring to the right
179  i)
180  );
181  }
182  }
183  }
184  }
185 
186  fFirstIterHist->SetEntries(fHist->GetEntries()); //set the number of entries to be the same as the original histo
187 
188  // do "function like" integration = sum of (bin_width*bin_content):
189  Float_t integ=0;
190  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++)
192  fFirstIterHist->Scale(1./integ);
193 
194  fHiddenIteration=false;
195 
196  // OK, now we have the first iteration,
197  // next: calculate the Sigmas (Widths) for the second (adaptive) iteration
198  // based on the output of the first iteration
199  // these Sigmas will be stored in histo called fSigmaHist
200  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
201  // loop over the bins of the PDF histo and fill fSigmaHist
202  if (fSigma*TMath::Sqrt(1.0/fFirstIterHist->GetBinContent(j)) <= 0 ) {
203  Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
204  }
205 
207  }
208  }
209 
210  if (fKernel_integ ==0 ) {
211  Log() << kFATAL << "KDE kernel not correctly initialized!" << Endl;
212  }
213 }
214 
215 ////////////////////////////////////////////////////////////////////////////////
216 /// calculates the integral of the Kernel
217 
219 {
220  if ((fIter == kNonadaptiveKDE) || fHiddenIteration )
221  fKernel_integ->SetParameters(mean,fSigma); // non adaptive KDE
222  else if ((fIter == kAdaptiveKDE) && !fHiddenIteration )
223  fKernel_integ->SetParameters(mean,fSigmaHist->GetBinContent(binnum)); // adaptive KDE
224 
225  if ( fKDEborder == 2 ) { // renormalization of the kernel function
226  Float_t renormFactor=1.0/fKernel_integ->Eval(fLowerEdge,fUpperEdge);
227  return (renormFactor*fKernel_integ->Eval(lowr,highr));
228  }
229 
230  // the RenormFactor takes care aboud the "border" effects, i.e. sets the
231  // integral to one inside the histogram borders
232  return (fKernel_integ->Eval(lowr,highr));
233 }
234 
double par[1]
Definition: unuranDistr.cxx:38
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:5936
Double_t GaussIntegral(Double_t *x, Double_t *par)
when using Gaussian as Kernel function this is faster way to calculate the integrals ...
Definition: KDEKernel.cxx:89
virtual void SetParameters(const Double_t *params)
Definition: TF1.h:439
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8251
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Double_t Log(Double_t x)
Definition: TMath.h:526
Float_t fLowerEdge
Definition: KDEKernel.h:79
float Float_t
Definition: RtypesCore.h:53
Float_t fUpperEdge
Definition: KDEKernel.h:80
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4638
virtual Double_t Integral(Option_t *option="") const
Return integral of bin contents.
Definition: TH1.cxx:7124
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
Definition: TH1.h:587
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
int Int_t
Definition: RtypesCore.h:41
MsgLogger & Log() const
Definition: KDEKernel.h:91
Double_t GetRMS(Int_t axis=1) const
Definition: TH1.h:320
TH1F * fHist
Definition: KDEKernel.h:84
Float_t fFineFactor
Definition: KDEKernel.h:81
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8262
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:501
Bool_t fHiddenIteration
Definition: KDEKernel.h:87
Double_t x[n]
Definition: legend1.C:17
EKernelIter fIter
Definition: KDEKernel.h:78
EKernelBorder fKDEborder
Definition: KDEKernel.h:83
TH1F * fFirstIterHist
Definition: KDEKernel.h:85
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:187
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8323
TH1F * fSigmaHist
Definition: KDEKernel.h:86
MsgLogger * fLogger
Definition: KDEKernel.h:90
Float_t fSigma
Definition: KDEKernel.h:77
virtual Double_t Eval(Double_t x, Double_t y=0, Double_t z=0, Double_t t=0) const
Evaluate this function.
Definition: TF1.cxx:1196
virtual Double_t GetBinWidth(Int_t bin) const
Return bin width for 1D histogram.
Definition: TH1.cxx:8273
#define ClassImp(name)
Definition: Rtypes.h:279
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
Definition: KDEKernel.cxx:218
double Double_t
Definition: RtypesCore.h:55
virtual ~KDEKernel(void)
destructor
Definition: KDEKernel.cxx:77
The TH1 histogram class.
Definition: TH1.h:80
virtual Double_t GetEntries() const
Return the current number of entries.
Definition: TH1.cxx:4053
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TObject.cxx:204
Abstract ClassifierFactory template that handles arbitrary types.
1-Dim function class
Definition: TF1.h:149
#define NULL
Definition: Rtypes.h:82
virtual void SetEntries(Double_t n)
Definition: TH1.h:387
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Definition: KDEKernel.cxx:113
virtual Int_t GetNbinsX() const
Definition: TH1.h:301
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
TF1 * fKernel_integ
Definition: KDEKernel.h:82