Logo ROOT   6.07/09
Reference Guide
PDEFoamDecisionTree.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Alexander Voigt
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Classes: PDEFoamDecisionTree *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation of decision tree like PDEFoam *
12  * *
13  * Authors (alphabetical): *
14  * Tancredi Carli - CERN, Switzerland *
15  * Dominik Dannheim - CERN, Switzerland *
16  * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
17  * Alexander Voigt - TU Dresden, Germany *
18  * Peter Speckmayer - CERN, Switzerland *
19  * *
20  * Copyright (c) 2010: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 //_____________________________________________________________________
30 //
31 // PDEFoamDecisionTree
32 //
33 // This PDEFoam variant acts like a decision tree and stores in every
34 // cell the discriminant
35 //
36 // D = #events with given class / total number of events
37 //
38 // as well as the statistical error on the discriminant. It therefore
39 // acts as a discriminant estimator. The decision tree-like behaviour
40 // is achieved by overriding PDEFoamDiscriminant::Explore() to use a
41 // decision tree-like cell splitting algorithm (given a separation
42 // type).
43 //
44 // This PDEFoam variant should be booked together with the
45 // PDEFoamDecisionTreeDensity density estimator, which returns the
46 // events in a cell without sampling.
47 //
48 //_____________________________________________________________________
49 
50 #ifndef ROOT_TMVA_PDEFoamDecisionTree
52 #endif
53 #ifndef ROOT_TMVA_PDEFoamDecisionTreeDensity
55 #endif
56 
57 #include "TMVA/MsgLogger.h"
58 #include "TMVA/PDEFoamCell.h"
60 #include "TMVA/PDEFoamVect.h"
61 #include "TMVA/SeparationBase.h"
62 #include "TMVA/Types.h"
63 #include "TMVA/Volume.h"
64 
65 #include "Rtypes.h"
66 #include "TH1D.h"
67 
68 class TString;
69 
71 
72 ////////////////////////////////////////////////////////////////////////////////
73 /// Default constructor for streamer, user should not use it.
74 
77  , fSepType(NULL)
78 {
79 }
80 
81 ////////////////////////////////////////////////////////////////////////////////
82 /// Parameters:
83 ///
84 /// - name - name of the foam
85 ///
86 /// - sepType - separation type used for the cell splitting (will be
87 /// deleted in the destructor)
88 ///
89 /// - cls - class to consider as signal when calcualting the purity
90 
92  : PDEFoamDiscriminant(name, cls)
93  , fSepType(sepType)
94 {
95 }
96 
97 ////////////////////////////////////////////////////////////////////////////////
98 /// Copy Constructor NOT IMPLEMENTED (NEVER USED)
99 
101  : PDEFoamDiscriminant(from)
102  , fSepType(from.fSepType)
103 {
104  Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl;
105 }
106 
107 ////////////////////////////////////////////////////////////////////////////////
108 /// Destructor
109 /// deletes fSepType
110 
112 {
113  if (fSepType)
114  delete fSepType;
115 }
116 
117 ////////////////////////////////////////////////////////////////////////////////
118 /// Internal subprogram used by Create. It explores newly defined
119 /// cell with according to the decision tree logic. The separation
120 /// set via the 'sepType' option in the constructor.
121 ///
122 /// The optimal division point for eventual future cell division is
123 /// determined/recorded. Note that links to parents and initial
124 /// volume = 1/2 parent has to be already defined prior to calling
125 /// this routine.
126 ///
127 /// Note, that according to the decision tree logic, a cell is only
128 /// split, if the number of (unweighted) events in each dautghter
129 /// cell is greater than fNmin.
130 
132 {
133  if (!cell)
134  Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl;
135 
136  // create edge histograms
137  std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw;
138  hsig.reserve(fDim);
139  hbkg.reserve(fDim);
140  hsig_unw.reserve(fDim);
141  hbkg_unw.reserve(fDim);
142  for (Int_t idim = 0; idim < fDim; idim++) {
143  hsig.push_back(new TH1D(Form("hsig_%i", idim),
144  Form("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
145  hbkg.push_back(new TH1D(Form("hbkg_%i", idim),
146  Form("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
147  hsig_unw.push_back(new TH1D(Form("hsig_unw_%i", idim),
148  Form("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
149  hbkg_unw.push_back(new TH1D(Form("hbkg_unw_%i", idim),
150  Form("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
151  }
152 
153  // get cell position and size
154  PDEFoamVect cellSize(GetTotDim()), cellPosi(GetTotDim());
155  cell->GetHcub(cellPosi, cellSize);
156 
157  // determine lower and upper cell bound
158  std::vector<Double_t> lb(GetTotDim()); // lower bound
159  std::vector<Double_t> ub(GetTotDim()); // upper bound
160  for (Int_t idim = 0; idim < GetTotDim(); idim++) {
161  lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon());
162  ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon());
163  }
164 
165  // fDistr must be of type PDEFoamDecisionTreeDensity*
167  if (distr == NULL)
168  Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: "
169  << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl;
170 
171  // create TMVA::Volume object needed for searching within the BST
172  TMVA::Volume volume(&lb, &ub);
173 
174  // fill the signal and background histograms for the given volume
175  distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw);
176 
177  // ------ determine the best division edge
178  Double_t xBest = 0.5; // best division point
179  Int_t kBest = -1; // best split dimension
180  Double_t maxGain = -1.0; // maximum gain
181  Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1);
182  Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1);
183  Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1);
184  Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1);
185 
186  for (Int_t idim = 0; idim < fDim; ++idim) {
187  Double_t nSelS = hsig.at(idim)->GetBinContent(0);
188  Double_t nSelB = hbkg.at(idim)->GetBinContent(0);
189  Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0);
190  Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0);
191  for (Int_t jLo = 1; jLo < fNBin; jLo++) {
192  nSelS += hsig.at(idim)->GetBinContent(jLo);
193  nSelB += hbkg.at(idim)->GetBinContent(jLo);
194  nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo);
195  nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo);
196 
197  // proceed if total number of events in left and right cell
198  // is greater than fNmin
199  if (!((nSelS_unw + nSelB_unw) >= GetNmin() &&
200  (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin()))
201  continue;
202 
203  Double_t xLo = 1.0 * jLo / fNBin;
204 
205  // calculate separation gain
206  Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB);
207 
208  if (gain >= maxGain) {
209  maxGain = gain;
210  xBest = xLo;
211  kBest = idim;
212  }
213  } // jLo
214  } // idim
215 
216  if (kBest >= fDim || kBest < 0) {
217  // No best division edge found! One must ensure, that this cell
218  // is not chosen for splitting in PeekMax(). But since in
219  // PeekMax() it is ensured that cell->GetDriv() > epsilon, one
220  // should set maxGain to -1.0 (or even 0.0?) here.
221  maxGain = -1.0;
222  }
223 
224  // set cell properties
225  cell->SetBest(kBest);
226  cell->SetXdiv(xBest);
227  if (nTotB + nTotS > 0)
228  cell->SetIntg(nTotS / (nTotB + nTotS));
229  else
230  cell->SetIntg(0.0);
231  cell->SetDriv(maxGain);
232  cell->CalcVolume();
233 
234  // set cell element 0 (total number of events in cell) during
235  // build-up
236  if (GetNmin() > 0)
237  SetCellElement(cell, 0, nTotS + nTotB);
238 
239  // clean up
240  for (UInt_t ih = 0; ih < hsig.size(); ih++) delete hsig.at(ih);
241  for (UInt_t ih = 0; ih < hbkg.size(); ih++) delete hbkg.at(ih);
242  for (UInt_t ih = 0; ih < hsig_unw.size(); ih++) delete hsig_unw.at(ih);
243  for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++) delete hbkg_unw.at(ih);
244 }
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Int_t fDim
Definition: PDEFoam.h:106
MsgLogger & Log() const
Definition: PDEFoam.h:262
Double_t * fXmax
Definition: PDEFoam.h:129
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
void SetCellElement(PDEFoamCell *cell, UInt_t i, Double_t value)
Set cell element i of cell to value.
Definition: PDEFoam.cxx:1432
virtual void FillHistograms(TMVA::Volume &, std::vector< TH1D * > &, std::vector< TH1D * > &, std::vector< TH1D * > &, std::vector< TH1D * > &)
Fill the given histograms with signal and background events, which are found in the volume...
void SetBest(Int_t Best)
Definition: PDEFoamCell.h:85
UInt_t GetNmin()
Definition: PDEFoam.h:228
PDEFoamDensityBase * fDistr
Definition: PDEFoam.h:137
Int_t GetTotDim() const
Definition: PDEFoam.h:219
Int_t fNBin
Definition: PDEFoam.h:109
void SetXdiv(Double_t Xdiv)
Definition: PDEFoamCell.h:86
virtual void Explore(PDEFoamCell *Cell)
Internal subprogram used by Create.
PDEFoamDecisionTree()
Default constructor for streamer, user should not use it.
void CalcVolume()
Calculates volume of the cell using size params which are calculated.
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void SetDriv(Double_t Driv)
Definition: PDEFoamCell.h:95
Double_t * fXmin
Definition: PDEFoam.h:128
REAL epsilon
Definition: triangle.c:617
virtual Double_t GetSeparationGain(const Double_t &nSelS, const Double_t &nSelB, const Double_t &nTotS, const Double_t &nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
void SetIntg(Double_t Intg)
Definition: PDEFoamCell.h:94
Abstract ClassifierFactory template that handles arbitrary types.
#define NULL
Definition: Rtypes.h:82
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:301
Float_t VarTransformInvers(Int_t idim, Float_t x) const
Definition: PDEFoam.h:320
void GetHcub(PDEFoamVect &, PDEFoamVect &) const
Provides size and position of the cell These parameter are calculated by analyzing information in all...
char name[80]
Definition: TGX11.cxx:109
virtual ~PDEFoamDecisionTree()
Destructor deletes fSepType.