Logo ROOT  
Reference Guide
KDEKernel.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Asen Christov
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6 * Package: TMVA *
7 * Class : TMVA::KDEKernel *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15 * *
16 * Copyright (c) 2005: *
17 * CERN, Switzerland *
18 * MPI-K Heidelberg, Germany *
19 * Freiburg U., Germany *
20 * *
21 * Redistribution and use in source and binary forms, with or without *
22 * modification, are permitted according to the terms listed in LICENSE *
23 * (http://tmva.sourceforge.net/LICENSE) *
24 **********************************************************************************/
25
26/*! \class TMVA::KDEKernel
27\ingroup TMVA
28
29KDE Kernel for "smoothing" the PDFs.
30
31*/
32
33#include "TH1.h"
34#include "TH1F.h"
35#include "TF1.h"
36
37#include "TMath.h"
38
39#include "TMVA/KDEKernel.h"
40#include "TMVA/MsgLogger.h"
41#include "TMVA/Types.h"
42
44
45////////////////////////////////////////////////////////////////////////////////
46/// constructor
47/// sanity check
48
49TMVA::KDEKernel::KDEKernel( EKernelIter kiter, const TH1 *hist, Float_t lower_edge, Float_t upper_edge,
50 EKernelBorder kborder, Float_t FineFactor )
51: fSigma( 1. ),
52 fIter ( kiter ),
53 fLowerEdge (lower_edge ),
54 fUpperEdge (upper_edge),
55 fFineFactor ( FineFactor ),
56 fKernel_integ ( 0 ),
57 fKDEborder ( kborder ),
58 fLogger( new MsgLogger("KDEKernel") )
59{
60 if (hist == NULL) {
61 Log() << kFATAL << "Called without valid histogram pointer (hist)!" << Endl;
62 }
63
64 fHist = (TH1F*)hist->Clone();
65 fFirstIterHist = (TH1F*)hist->Clone();
66 fFirstIterHist->Reset(); // now it is empty but with the proper binning
67 fSigmaHist = (TH1F*)hist->Clone();
68 fSigmaHist->Reset(); // now fSigmaHist is empty but with the proper binning
69
70 fHiddenIteration=false;
71}
72
73////////////////////////////////////////////////////////////////////////////////
74/// destructor
75
77{
78 if (fHist != NULL) delete fHist;
79 if (fFirstIterHist != NULL) delete fFirstIterHist;
80 if (fSigmaHist != NULL) delete fSigmaHist;
81 if (fKernel_integ != NULL) delete fKernel_integ;
82 delete fLogger;
83}
84
85////////////////////////////////////////////////////////////////////////////////
86/// when using Gaussian as Kernel function this is faster way to calculate the integrals
87
89{
90 if ( (par[1]<=0) || (x[0]>x[1])) return -1.;
91
92 Float_t xs1=(x[0]-par[0])/par[1];
93 Float_t xs2=(x[1]-par[0])/par[1];
94
95 if (xs1==0) {
96 if (xs2==0) return 0.;
97 if (xs2>0 ) return 0.5*TMath::Erf(xs2);
98 }
99 if (xs2==0) return 0.5*TMath::Erf(TMath::Abs(xs1));
100 if (xs1>0) return 0.5*(TMath::Erf(xs2)-TMath::Erf(xs1));
101 if (xs1<0) {
102 if (xs2>0 ) return 0.5*(TMath::Erf(xs2)+TMath::Erf(TMath::Abs(xs1)));
103 else return 0.5*(TMath::Erf(TMath::Abs(xs1))-TMath::Erf(TMath::Abs(xs2)));
104 }
105 return -1.;
106}
107
108////////////////////////////////////////////////////////////////////////////////
109/// fIter == 1 ---> nonadaptive KDE
110/// fIter == 2 ---> adaptive KDE
111
113{
114 if (ktype == kGauss) {
115
116 // i.e. gauss kernel
117 //
118 // this is going to be done for both (nonadaptive KDE and adaptive KDE)
119 // for nonadaptive KDE this is the only = final thing to do
120 // for adaptive KDE this is going to be used in the first (hidden) iteration
121 fKernel_integ = new TF1("GaussIntegral",GaussIntegral,fLowerEdge,fUpperEdge,4);
122 fSigma = ( TMath::Sqrt(2.0)
123 *TMath::Power(4./3., 0.2)
124 *fHist->GetRMS()
125 *TMath::Power(fHist->Integral(), -0.2) );
126 // this formula for sigma is valid for Gaussian Kernel function (nonadaptive KDE).
127 // formula found in:
128 // Multivariate Density Estimation, Theory, Practice and Visualization D. W. SCOTT, 1992 New York, Wiley
129 if (fSigma <= 0 ) {
130 Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
131 }
132 }
133
134 if (fIter == kAdaptiveKDE) {
135
136 // this is done only for adaptive KDE
137
138 // fill a temporary histo using nonadaptive KDE
139 // this histo is identical with the final output when using only nonadaptive KDE
140 fHiddenIteration=true;
141
142 Float_t histoLowEdge=fHist->GetBinLowEdge(1);
143 Float_t histoUpperEdge=fHist->GetBinLowEdge(fHist->GetNbinsX()+1);
144
145 for (Int_t i=1;i<fHist->GetNbinsX();i++) {
146 // loop over the bins of the original histo
147 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
148 // loop over the bins of the PDF histo and fill it
149 fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
150 this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
151 fFirstIterHist->GetBinLowEdge(j+1),
152 fHist->GetBinCenter(i),
153 i)
154 );
155 }
156 if (fKDEborder == 3) { // mirror the samples and fill them again
157 // in order to save time do the mirroring only for the first (the lower) 1/5 of the histo to the left;
158 // and the last (the higher) 1/5 of the histo to the right.
159 // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
160 if (i < fHist->GetNbinsX()/5 ) { // the first (the lower) 1/5 of the histo
161 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
162 // loop over the bins of the PDF histo and fill it
163 fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
164 this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
165 fFirstIterHist->GetBinLowEdge(j+1),
166 2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
167 i)
168 );
169 }
170 }
171 if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
172 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
173 // loop over the bins of the PDF histo and fill it
174 fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
175 this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
176 fFirstIterHist->GetBinLowEdge(j+1),
177 2*histoUpperEdge-fHist->GetBinCenter(i), // mirroring to the right
178 i)
179 );
180 }
181 }
182 }
183 }
184
185 fFirstIterHist->SetEntries(fHist->GetEntries()); //set the number of entries to be the same as the original histo
186
187 // do "function like" integration = sum of (bin_width*bin_content):
188 Float_t integ=0;
189 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++)
190 integ+=fFirstIterHist->GetBinContent(j)*fFirstIterHist->GetBinWidth(j);
191 fFirstIterHist->Scale(1./integ);
192
193 fHiddenIteration=false;
194
195 // OK, now we have the first iteration,
196 // next: calculate the Sigmas (Widths) for the second (adaptive) iteration
197 // based on the output of the first iteration
198 // these Sigmas will be stored in histo called fSigmaHist
199 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
200 // loop over the bins of the PDF histo and fill fSigmaHist
201 if (fSigma*TMath::Sqrt(1.0/fFirstIterHist->GetBinContent(j)) <= 0 ) {
202 Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
203 }
204
205 fSigmaHist->SetBinContent(j,fFineFactor*fSigma/TMath::Sqrt(fFirstIterHist->GetBinContent(j)));
206 }
207 }
208
209 if (fKernel_integ ==0 ) {
210 Log() << kFATAL << "KDE kernel not correctly initialized!" << Endl;
211 }
212}
213
214////////////////////////////////////////////////////////////////////////////////
215/// calculates the integral of the Kernel
216
218{
219 if ((fIter == kNonadaptiveKDE) || fHiddenIteration )
220 fKernel_integ->SetParameters(mean,fSigma); // non adaptive KDE
221 else if ((fIter == kAdaptiveKDE) && !fHiddenIteration )
222 fKernel_integ->SetParameters(mean,fSigmaHist->GetBinContent(binnum)); // adaptive KDE
223
224 if ( fKDEborder == 2 ) { // renormalization of the kernel function
225 Float_t renormFactor=1.0/fKernel_integ->Eval(fLowerEdge,fUpperEdge);
226 return (renormFactor*fKernel_integ->Eval(lowr,highr));
227 }
228
229 // the RenormFactor takes care about the "border" effects, i.e. sets the
230 // integral to one inside the histogram borders
231 return (fKernel_integ->Eval(lowr,highr));
232}
233
Double_t GaussIntegral(Double_t *x, Double_t *par)
when using Gaussian as Kernel function this is faster way to calculate the integrals
Definition: KDEKernel.cxx:88
float Float_t
Definition: RtypesCore.h:57
double Double_t
Definition: RtypesCore.h:59
#define ClassImp(name)
Definition: Rtypes.h:375
1-Dim function class
Definition: TF1.h:213
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:574
void Reset(Option_t *option="") override
Reset.
Definition: TH1.cxx:9985
TH1 is the base class of all histogram classes in ROOT.
Definition: TH1.h:58
TObject * Clone(const char *newname="") const override
Make a complete copy of the underlying object.
Definition: TH1.cxx:2737
KDE Kernel for "smoothing" the PDFs.
Definition: KDEKernel.h:50
KDEKernel(EKernelIter kiter=kNonadaptiveKDE, const TH1 *hist=0, Float_t lower_edge=0., Float_t upper_edge=1., EKernelBorder kborder=kNoTreatment, Float_t FineFactor=1.)
constructor sanity check
Definition: KDEKernel.cxx:49
TH1F * fHist
copy of input histogram
Definition: KDEKernel.h:82
virtual ~KDEKernel(void)
destructor
Definition: KDEKernel.cxx:76
TH1F * fSigmaHist
contains the Sigmas Widths for adaptive KDE
Definition: KDEKernel.h:84
TH1F * fFirstIterHist
histogram to be filled in the hidden iteration
Definition: KDEKernel.h:83
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Definition: KDEKernel.cxx:112
MsgLogger & Log() const
Definition: KDEKernel.h:89
Bool_t fHiddenIteration
Defines if whats currently running is the.
Definition: KDEKernel.h:85
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
Definition: KDEKernel.cxx:217
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:57
@ kFATAL
Definition: Types.h:61
Double_t x[n]
Definition: legend1.C:17
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:148
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:190
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition: TMath.h:753
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition: TMath.h:659
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Definition: TMath.h:718
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition: TMathBase.h:123