Logo ROOT   6.16/01
Reference Guide
KDEKernel.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Asen Christov
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6 * Package: TMVA *
7 * Class : TMVA::KDEKernel *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15 * *
16 * Copyright (c) 2005: *
17 * CERN, Switzerland *
18 * MPI-K Heidelberg, Germany *
19 * Freiburg U., Germany *
20 * *
21 * Redistribution and use in source and binary forms, with or without *
22 * modification, are permitted according to the terms listed in LICENSE *
23 * (http://tmva.sourceforge.net/LICENSE) *
24 **********************************************************************************/
25
26/*! \class TMVA::KDEKernel
27\ingroup TMVA
28
29KDE Kernel for "smoothing" the PDFs.
30
31*/
32
33#include "TH1.h"
34#include "TH1F.h"
35#include "TF1.h"
36
37// #if ROOT_VERSION_CODE >= 364802
38// #ifndef ROOT_TMathBase
39// #include "TMathBase.h"
40// #endif
41// #else
42// #ifndef ROOT_TMath
43#include "TMath.h"
44// #endif
45// #endif
46
47#include "TMVA/KDEKernel.h"
48#include "TMVA/MsgLogger.h"
49#include "TMVA/Types.h"
50
52
53////////////////////////////////////////////////////////////////////////////////
54/// constructor
55/// sanity check
56
57TMVA::KDEKernel::KDEKernel( EKernelIter kiter, const TH1 *hist, Float_t lower_edge, Float_t upper_edge,
58 EKernelBorder kborder, Float_t FineFactor )
59: fSigma( 1. ),
60 fIter ( kiter ),
61 fLowerEdge (lower_edge ),
62 fUpperEdge (upper_edge),
63 fFineFactor ( FineFactor ),
64 fKernel_integ ( 0 ),
65 fKDEborder ( kborder ),
66 fLogger( new MsgLogger("KDEKernel") )
67{
68 if (hist == NULL) {
69 Log() << kFATAL << "Called without valid histogram pointer (hist)!" << Endl;
70 }
71
72 fHist = (TH1F*)hist->Clone();
73 fFirstIterHist = (TH1F*)hist->Clone();
74 fFirstIterHist->Reset(); // now it is empty but with the proper binning
75 fSigmaHist = (TH1F*)hist->Clone();
76 fSigmaHist->Reset(); // now fSigmaHist is empty but with the proper binning
77
78 fHiddenIteration=false;
79}
80
81////////////////////////////////////////////////////////////////////////////////
82/// destructor
83
85{
86 if (fHist != NULL) delete fHist;
87 if (fFirstIterHist != NULL) delete fFirstIterHist;
88 if (fSigmaHist != NULL) delete fSigmaHist;
89 if (fKernel_integ != NULL) delete fKernel_integ;
90 delete fLogger;
91}
92
93////////////////////////////////////////////////////////////////////////////////
94/// when using Gaussian as Kernel function this is faster way to calculate the integrals
95
97{
98 if ( (par[1]<=0) || (x[0]>x[1])) return -1.;
99
100 Float_t xs1=(x[0]-par[0])/par[1];
101 Float_t xs2=(x[1]-par[0])/par[1];
102
103 if (xs1==0) {
104 if (xs2==0) return 0.;
105 if (xs2>0 ) return 0.5*TMath::Erf(xs2);
106 }
107 if (xs2==0) return 0.5*TMath::Erf(TMath::Abs(xs1));
108 if (xs1>0) return 0.5*(TMath::Erf(xs2)-TMath::Erf(xs1));
109 if (xs1<0) {
110 if (xs2>0 ) return 0.5*(TMath::Erf(xs2)+TMath::Erf(TMath::Abs(xs1)));
111 else return 0.5*(TMath::Erf(TMath::Abs(xs1))-TMath::Erf(TMath::Abs(xs2)));
112 }
113 return -1.;
114}
115
116////////////////////////////////////////////////////////////////////////////////
117/// fIter == 1 ---> nonadaptive KDE
118/// fIter == 2 ---> adaptive KDE
119
121{
122 if (ktype == kGauss) {
123
124 // i.e. gauss kernel
125 //
126 // this is going to be done for both (nonadaptive KDE and adaptive KDE)
127 // for nonadaptive KDE this is the only = final thing to do
128 // for adaptive KDE this is going to be used in the first (hidden) iteration
129 fKernel_integ = new TF1("GaussIntegral",GaussIntegral,fLowerEdge,fUpperEdge,4);
130 fSigma = ( TMath::Sqrt(2.0)
131 *TMath::Power(4./3., 0.2)
132 *fHist->GetRMS()
133 *TMath::Power(fHist->Integral(), -0.2) );
134 // this formula for sigma is valid for Gaussian Kernel function (nonadaptive KDE).
135 // formula found in:
136 // Multivariate Density Estimation, Theory, Practice and Visualization D. W. SCOTT, 1992 New York, Wiley
137 if (fSigma <= 0 ) {
138 Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
139 }
140 }
141
142 if (fIter == kAdaptiveKDE) {
143
144 // this is done only for adaptive KDE
145
146 // fill a temporary histo using nonadaptive KDE
147 // this histo is identical with the final output when using only nonadaptive KDE
148 fHiddenIteration=true;
149
150 Float_t histoLowEdge=fHist->GetBinLowEdge(1);
151 Float_t histoUpperEdge=fHist->GetBinLowEdge(fHist->GetNbinsX()+1);
152
153 for (Int_t i=1;i<fHist->GetNbinsX();i++) {
154 // loop over the bins of the original histo
155 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
156 // loop over the bins of the PDF histo and fill it
157 fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
158 this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
159 fFirstIterHist->GetBinLowEdge(j+1),
160 fHist->GetBinCenter(i),
161 i)
162 );
163 }
164 if (fKDEborder == 3) { // mirror the samples and fill them again
165 // in order to save time do the mirroring only for the first (the lower) 1/5 of the histo to the left;
166 // and the last (the higher) 1/5 of the histo to the right.
167 // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
168 if (i < fHist->GetNbinsX()/5 ) { // the first (the lower) 1/5 of the histo
169 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
170 // loop over the bins of the PDF histo and fill it
171 fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
172 this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
173 fFirstIterHist->GetBinLowEdge(j+1),
174 2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
175 i)
176 );
177 }
178 }
179 if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
180 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
181 // loop over the bins of the PDF histo and fill it
182 fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
183 this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
184 fFirstIterHist->GetBinLowEdge(j+1),
185 2*histoUpperEdge-fHist->GetBinCenter(i), // mirroring to the right
186 i)
187 );
188 }
189 }
190 }
191 }
192
193 fFirstIterHist->SetEntries(fHist->GetEntries()); //set the number of entries to be the same as the original histo
194
195 // do "function like" integration = sum of (bin_width*bin_content):
196 Float_t integ=0;
197 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++)
198 integ+=fFirstIterHist->GetBinContent(j)*fFirstIterHist->GetBinWidth(j);
199 fFirstIterHist->Scale(1./integ);
200
201 fHiddenIteration=false;
202
203 // OK, now we have the first iteration,
204 // next: calculate the Sigmas (Widths) for the second (adaptive) iteration
205 // based on the output of the first iteration
206 // these Sigmas will be stored in histo called fSigmaHist
207 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
208 // loop over the bins of the PDF histo and fill fSigmaHist
209 if (fSigma*TMath::Sqrt(1.0/fFirstIterHist->GetBinContent(j)) <= 0 ) {
210 Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
211 }
212
213 fSigmaHist->SetBinContent(j,fFineFactor*fSigma/TMath::Sqrt(fFirstIterHist->GetBinContent(j)));
214 }
215 }
216
217 if (fKernel_integ ==0 ) {
218 Log() << kFATAL << "KDE kernel not correctly initialized!" << Endl;
219 }
220}
221
222////////////////////////////////////////////////////////////////////////////////
223/// calculates the integral of the Kernel
224
226{
227 if ((fIter == kNonadaptiveKDE) || fHiddenIteration )
228 fKernel_integ->SetParameters(mean,fSigma); // non adaptive KDE
229 else if ((fIter == kAdaptiveKDE) && !fHiddenIteration )
230 fKernel_integ->SetParameters(mean,fSigmaHist->GetBinContent(binnum)); // adaptive KDE
231
232 if ( fKDEborder == 2 ) { // renormalization of the kernel function
233 Float_t renormFactor=1.0/fKernel_integ->Eval(fLowerEdge,fUpperEdge);
234 return (renormFactor*fKernel_integ->Eval(lowr,highr));
235 }
236
237 // the RenormFactor takes care about the "border" effects, i.e. sets the
238 // integral to one inside the histogram borders
239 return (fKernel_integ->Eval(lowr,highr));
240}
241
Double_t GaussIntegral(Double_t *x, Double_t *par)
when using Gaussian as Kernel function this is faster way to calculate the integrals
Definition: KDEKernel.cxx:96
int Int_t
Definition: RtypesCore.h:41
double Double_t
Definition: RtypesCore.h:55
float Float_t
Definition: RtypesCore.h:53
#define ClassImp(name)
Definition: Rtypes.h:363
1-Dim function class
Definition: TF1.h:211
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
virtual void Reset(Option_t *option="")
Reset.
Definition: TH1.cxx:9426
The TH1 histogram class.
Definition: TH1.h:56
TObject * Clone(const char *newname=0) const
Make a complete copy of the underlying object.
Definition: TH1.cxx:2657
KDE Kernel for "smoothing" the PDFs.
Definition: KDEKernel.h:50
KDEKernel(EKernelIter kiter=kNonadaptiveKDE, const TH1 *hist=0, Float_t lower_edge=0., Float_t upper_edge=1., EKernelBorder kborder=kNoTreatment, Float_t FineFactor=1.)
constructor sanity check
Definition: KDEKernel.cxx:57
TH1F * fHist
Definition: KDEKernel.h:82
virtual ~KDEKernel(void)
destructor
Definition: KDEKernel.cxx:84
TH1F * fSigmaHist
Definition: KDEKernel.h:84
TH1F * fFirstIterHist
Definition: KDEKernel.h:83
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Definition: KDEKernel.cxx:120
MsgLogger & Log() const
Definition: KDEKernel.h:89
Bool_t fHiddenIteration
Definition: KDEKernel.h:85
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
Definition: KDEKernel.cxx:225
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Double_t x[n]
Definition: legend1.C:17
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:184
Double_t Log(Double_t x)
Definition: TMath.h:748
Double_t Sqrt(Double_t x)
Definition: TMath.h:679
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:723
Short_t Abs(Short_t d)
Definition: TMathBase.h:120