ROOT  6.06/09
Reference Guide
KDEKernel.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Asen Christov
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6  * Package: TMVA *
7  * Class : TMVA::KDEKernel *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * Freiburg U., Germany *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://tmva.sourceforge.net/LICENSE) *
24  **********************************************************************************/
25 
26 #include "TH1.h"
27 #include "TH1F.h"
28 #include "TF1.h"
29 
30 // #if ROOT_VERSION_CODE >= 364802
31 // #ifndef ROOT_TMathBase
32 // #include "TMathBase.h"
33 // #endif
34 // #else
35 // #ifndef ROOT_TMath
36 #include "TMath.h"
37 // #endif
38 // #endif
39 
40 #include "TMVA/KDEKernel.h"
41 #include "TMVA/MsgLogger.h"
42 
44 
45 ////////////////////////////////////////////////////////////////////////////////
46 /// constructor
47 /// sanity check
48 
49 TMVA::KDEKernel::KDEKernel( EKernelIter kiter, const TH1 *hist, Float_t lower_edge, Float_t upper_edge,
50  EKernelBorder kborder, Float_t FineFactor )
51  : fSigma( 1. ),
52  fIter ( kiter ),
53  fLowerEdge (lower_edge ),
54  fUpperEdge (upper_edge),
55  fFineFactor ( FineFactor ),
56  fKernel_integ ( 0 ),
57  fKDEborder ( kborder ),
58  fLogger( new MsgLogger("KDEKernel") )
59 {
60  if (hist == NULL) {
61  Log() << kFATAL << "Called without valid histogram pointer (hist)!" << Endl;
62  }
63 
64  fHist = (TH1F*)hist->Clone();
65  fFirstIterHist = (TH1F*)hist->Clone();
66  fFirstIterHist->Reset(); // now it is empty but with the proper binning
67  fSigmaHist = (TH1F*)hist->Clone();
68  fSigmaHist->Reset(); // now fSigmaHist is empty but with the proper binning
69 
70  fHiddenIteration=false;
71 }
72 
73 ////////////////////////////////////////////////////////////////////////////////
74 /// destructor
75 
77 {
78  if (fHist != NULL) delete fHist;
79  if (fFirstIterHist != NULL) delete fFirstIterHist;
80  if (fSigmaHist != NULL) delete fSigmaHist;
81  if (fKernel_integ != NULL) delete fKernel_integ;
82  delete fLogger;
83 }
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 /// when using Gaussian as Kernel function this is faster way to calculate the integrals
87 
89 {
90  if ( (par[1]<=0) || (x[0]>x[1])) return -1.;
91 
92  Float_t xs1=(x[0]-par[0])/par[1];
93  Float_t xs2=(x[1]-par[0])/par[1];
94 
95  if (xs1==0) {
96  if (xs2==0) return 0.;
97  if (xs2>0 ) return 0.5*TMath::Erf(xs2);
98  }
99  if (xs2==0) return 0.5*TMath::Erf(TMath::Abs(xs1));
100  if (xs1>0) return 0.5*(TMath::Erf(xs2)-TMath::Erf(xs1));
101  if (xs1<0) {
102  if (xs2>0 ) return 0.5*(TMath::Erf(xs2)+TMath::Erf(TMath::Abs(xs1)));
103  else return 0.5*(TMath::Erf(TMath::Abs(xs1))-TMath::Erf(TMath::Abs(xs2)));
104  }
105  return -1.;
106 }
107 
108 ////////////////////////////////////////////////////////////////////////////////
109 /// fIter == 1 ---> nonadaptive KDE
110 /// fIter == 2 ---> adaptive KDE
111 
113 {
114  if (ktype == kGauss) {
115 
116  // i.e. gauss kernel
117  //
118  // this is going to be done for both (nonadaptive KDE and adaptive KDE)
119  // for nonadaptive KDE this is the only = final thing to do
120  // for adaptive KDE this is going to be used in the first (hidden) iteration
121  fKernel_integ = new TF1("GaussIntegral",GaussIntegral,fLowerEdge,fUpperEdge,4);
122  fSigma = ( TMath::Sqrt(2.0)
123  *TMath::Power(4./3., 0.2)
124  *fHist->GetRMS()
125  *TMath::Power(fHist->Integral(), -0.2) );
126  // this formula for sigma is valid for Gaussian Kernel function (nonadaptive KDE).
127  // formula found in:
128  // Multivariate Density Estimation, Theory, Practice and Visualization D. W. SCOTT, 1992 New York, Wiley
129  if (fSigma <= 0 ) {
130  Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
131  }
132  }
133 
134  if (fIter == kAdaptiveKDE) {
135 
136  // this is done only for adaptive KDE
137 
138  // fill a temporary histo using nonadaptive KDE
139  // this histo is identical with the final output when using only nonadaptive KDE
140  fHiddenIteration=true;
141 
142  Float_t histoLowEdge=fHist->GetBinLowEdge(1);
143  Float_t histoUpperEdge=fHist->GetBinLowEdge(fHist->GetNbinsX()+1);
144 
145  for (Int_t i=1;i<fHist->GetNbinsX();i++) {
146  // loop over the bins of the original histo
147  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
148  // loop over the bins of the PDF histo and fill it
149  fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
150  this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
151  fFirstIterHist->GetBinLowEdge(j+1),
152  fHist->GetBinCenter(i),
153  i)
154  );
155  }
156  if (fKDEborder == 3) { // mirror the saples and fill them again
157  // in order to save time do the mirroring only for the first (the lowwer) 1/5 of the histo to the left;
158  // and the last (the higher) 1/5 of the histo to the right.
159  // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
160  if (i < fHist->GetNbinsX()/5 ) { // the first (the lowwer) 1/5 of the histo
161  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
162  // loop over the bins of the PDF histo and fill it
163  fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
164  this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
165  fFirstIterHist->GetBinLowEdge(j+1),
166  2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
167  i)
168  );
169  }
170  }
171  if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
172  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
173  // loop over the bins of the PDF histo and fill it
174  fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
175  this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
176  fFirstIterHist->GetBinLowEdge(j+1),
177  2*histoUpperEdge-fHist->GetBinCenter(i), // mirroring to the right
178  i)
179  );
180  }
181  }
182  }
183  }
184 
185  fFirstIterHist->SetEntries(fHist->GetEntries()); //set the number of entries to be the same as the original histo
186 
187  // do "function like" integration = sum of (bin_width*bin_content):
188  Float_t integ=0;
189  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++)
190  integ+=fFirstIterHist->GetBinContent(j)*fFirstIterHist->GetBinWidth(j);
191  fFirstIterHist->Scale(1./integ);
192 
193  fHiddenIteration=false;
194 
195  // OK, now we have the first iteration,
196  // next: calculate the Sigmas (Widths) for the second (adaptive) iteration
197  // based on the output of the first iteration
198  // these Sigmas will be stored in histo called fSigmaHist
199  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
200  // loop over the bins of the PDF histo and fill fSigmaHist
201  if (fSigma*TMath::Sqrt(1.0/fFirstIterHist->GetBinContent(j)) <= 0 ) {
202  Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
203  }
204 
205  fSigmaHist->SetBinContent(j,fFineFactor*fSigma/TMath::Sqrt(fFirstIterHist->GetBinContent(j)));
206  }
207  }
208 
209  if (fKernel_integ ==0 ) {
210  Log() << kFATAL << "KDE kernel not correctly initialized!" << Endl;
211  }
212 }
213 
214 ////////////////////////////////////////////////////////////////////////////////
215 /// calculates the integral of the Kernel
216 
218 {
219  if ((fIter == kNonadaptiveKDE) || fHiddenIteration )
220  fKernel_integ->SetParameters(mean,fSigma); // non adaptive KDE
221  else if ((fIter == kAdaptiveKDE) && !fHiddenIteration )
222  fKernel_integ->SetParameters(mean,fSigmaHist->GetBinContent(binnum)); // adaptive KDE
223 
224  if ( fKDEborder == 2 ) { // renormalization of the kernel function
225  Float_t renormFactor=1.0/fKernel_integ->Eval(fLowerEdge,fUpperEdge);
226  return (renormFactor*fKernel_integ->Eval(lowr,highr));
227  }
228 
229  // the RenormFactor takes care aboud the "border" effects, i.e. sets the
230  // integral to one inside the histogram borders
231  return (fKernel_integ->Eval(lowr,highr));
232 }
233 
double par[1]
Definition: unuranDistr.cxx:38
Double_t GaussIntegral(Double_t *x, Double_t *par)
when using Gaussian as Kernel function this is faster way to calculate the integrals ...
Definition: KDEKernel.cxx:88
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
ClassImp(TMVA::KDEKernel) TMVA
constructor sanity check
Definition: KDEKernel.cxx:43
float Float_t
Definition: RtypesCore.h:53
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:570
int Int_t
Definition: RtypesCore.h:41
TH1F * fHist
Definition: KDEKernel.h:84
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:501
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TObject.cxx:203
Double_t x[n]
Definition: legend1.C:17
TH1F * fFirstIterHist
Definition: KDEKernel.h:85
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:187
TH1F * fSigmaHist
Definition: KDEKernel.h:86
MsgLogger * fLogger
Definition: KDEKernel.h:90
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
Definition: KDEKernel.cxx:217
double Double_t
Definition: RtypesCore.h:55
virtual ~KDEKernel(void)
destructor
Definition: KDEKernel.cxx:76
The TH1 histogram class.
Definition: TH1.h:80
Abstract ClassifierFactory template that handles arbitrary types.
1-Dim function class
Definition: TF1.h:149
#define NULL
Definition: Rtypes.h:82
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Definition: KDEKernel.cxx:112
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
Definition: math.cpp:60
TF1 * fKernel_integ
Definition: KDEKernel.h:82