Logo ROOT   6.10/09
Reference Guide
PDEFoamDecisionTree.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Alexander Voigt
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Classes: PDEFoamDecisionTree *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation of decision tree like PDEFoam *
12  * *
13  * Authors (alphabetical): *
14  * Tancredi Carli - CERN, Switzerland *
15  * Dominik Dannheim - CERN, Switzerland *
16  * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
17  * Alexander Voigt - TU Dresden, Germany *
18  * Peter Speckmayer - CERN, Switzerland *
19  * *
20  * Copyright (c) 2010: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 /*! \class TMVA::PDEFoamDecisionTree
30 \ingroup TMVA
31 
32 This PDEFoam variant acts like a decision tree and stores in every
33 cell the discriminant
34 
35  D = #events with given class / total number of events
36 
37 as well as the statistical error on the discriminant. It therefore
38 acts as a discriminant estimator. The decision tree-like behaviour
39 is achieved by overriding PDEFoamDiscriminant::Explore() to use a
40 decision tree-like cell splitting algorithm (given a separation
41 type).
42 
43 This PDEFoam variant should be booked together with the
44 PDEFoamDecisionTreeDensity density estimator, which returns the
45 events in a cell without sampling.
46 */
47 
50 
51 #include "TMVA/MsgLogger.h"
52 #include "TMVA/PDEFoamCell.h"
54 #include "TMVA/PDEFoamVect.h"
55 #include "TMVA/SeparationBase.h"
56 #include "TMVA/Types.h"
57 #include "TMVA/Volume.h"
58 
59 #include "Rtypes.h"
60 #include "TH1D.h"
61 
62 class TString;
63 
65 
66 ////////////////////////////////////////////////////////////////////////////////
67 /// Default constructor for streamer, user should not use it.
68 
71  , fSepType(NULL)
72 {
73 }
74 
75 ////////////////////////////////////////////////////////////////////////////////
76 /// Parameters:
77 ///
78 /// - name - name of the foam
79 ///
80 /// - sepType - separation type used for the cell splitting (will be
81 /// deleted in the destructor)
82 ///
83 /// - cls - class to consider as signal when calculating the purity
84 
86  : PDEFoamDiscriminant(name, cls)
87  , fSepType(sepType)
88 {
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// Copy Constructor NOT IMPLEMENTED (NEVER USED)
93 
95  : PDEFoamDiscriminant(from)
96  , fSepType(from.fSepType)
97 {
98  Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl;
99 }
100 
101 ////////////////////////////////////////////////////////////////////////////////
102 /// Destructor
103 /// deletes fSepType
104 
106 {
107  if (fSepType)
108  delete fSepType;
109 }
110 
111 ////////////////////////////////////////////////////////////////////////////////
112 /// Internal subprogram used by Create. It explores newly defined
113 /// cell with according to the decision tree logic. The separation
114 /// set via the 'sepType' option in the constructor.
115 ///
116 /// The optimal division point for eventual future cell division is
117 /// determined/recorded. Note that links to parents and initial
118 /// volume = 1/2 parent has to be already defined prior to calling
119 /// this routine.
120 ///
121 /// Note, that according to the decision tree logic, a cell is only
122 /// split, if the number of (unweighted) events in each daughter
123 /// cell is greater than fNmin.
124 
126 {
127  if (!cell)
128  Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl;
129 
130  // create edge histograms
131  std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw;
132  hsig.reserve(fDim);
133  hbkg.reserve(fDim);
134  hsig_unw.reserve(fDim);
135  hbkg_unw.reserve(fDim);
136  for (Int_t idim = 0; idim < fDim; idim++) {
137  hsig.push_back(new TH1D(Form("hsig_%i", idim),
138  Form("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
139  hbkg.push_back(new TH1D(Form("hbkg_%i", idim),
140  Form("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
141  hsig_unw.push_back(new TH1D(Form("hsig_unw_%i", idim),
142  Form("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
143  hbkg_unw.push_back(new TH1D(Form("hbkg_unw_%i", idim),
144  Form("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
145  }
146 
147  // get cell position and size
148  PDEFoamVect cellSize(GetTotDim()), cellPosi(GetTotDim());
149  cell->GetHcub(cellPosi, cellSize);
150 
151  // determine lower and upper cell bound
152  std::vector<Double_t> lb(GetTotDim()); // lower bound
153  std::vector<Double_t> ub(GetTotDim()); // upper bound
154  for (Int_t idim = 0; idim < GetTotDim(); idim++) {
155  lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon());
156  ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon());
157  }
158 
159  // fDistr must be of type PDEFoamDecisionTreeDensity*
161  if (distr == NULL)
162  Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: "
163  << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl;
164 
165  // create TMVA::Volume object needed for searching within the BST
166  TMVA::Volume volume(&lb, &ub);
167 
168  // fill the signal and background histograms for the given volume
169  distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw);
170 
171  // ------ determine the best division edge
172  Double_t xBest = 0.5; // best division point
173  Int_t kBest = -1; // best split dimension
174  Double_t maxGain = -1.0; // maximum gain
175  Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1);
176  Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1);
177  Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1);
178  Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1);
179 
180  for (Int_t idim = 0; idim < fDim; ++idim) {
181  Double_t nSelS = hsig.at(idim)->GetBinContent(0);
182  Double_t nSelB = hbkg.at(idim)->GetBinContent(0);
183  Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0);
184  Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0);
185  for (Int_t jLo = 1; jLo < fNBin; jLo++) {
186  nSelS += hsig.at(idim)->GetBinContent(jLo);
187  nSelB += hbkg.at(idim)->GetBinContent(jLo);
188  nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo);
189  nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo);
190 
191  // proceed if total number of events in left and right cell
192  // is greater than fNmin
193  if (!((nSelS_unw + nSelB_unw) >= GetNmin() &&
194  (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin()))
195  continue;
196 
197  Double_t xLo = 1.0 * jLo / fNBin;
198 
199  // calculate separation gain
200  Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB);
201 
202  if (gain >= maxGain) {
203  maxGain = gain;
204  xBest = xLo;
205  kBest = idim;
206  }
207  } // jLo
208  } // idim
209 
210  if (kBest >= fDim || kBest < 0) {
211  // No best division edge found! One must ensure, that this cell
212  // is not chosen for splitting in PeekMax(). But since in
213  // PeekMax() it is ensured that cell->GetDriv() > epsilon, one
214  // should set maxGain to -1.0 (or even 0.0?) here.
215  maxGain = -1.0;
216  }
217 
218  // set cell properties
219  cell->SetBest(kBest);
220  cell->SetXdiv(xBest);
221  if (nTotB + nTotS > 0)
222  cell->SetIntg(nTotS / (nTotB + nTotS));
223  else
224  cell->SetIntg(0.0);
225  cell->SetDriv(maxGain);
226  cell->CalcVolume();
227 
228  // set cell element 0 (total number of events in cell) during
229  // build-up
230  if (GetNmin() > 0)
231  SetCellElement(cell, 0, nTotS + nTotB);
232 
233  // clean up
234  for (UInt_t ih = 0; ih < hsig.size(); ih++) delete hsig.at(ih);
235  for (UInt_t ih = 0; ih < hbkg.size(); ih++) delete hbkg.at(ih);
236  for (UInt_t ih = 0; ih < hsig_unw.size(); ih++) delete hsig_unw.at(ih);
237  for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++) delete hbkg_unw.at(ih);
238 }
This is a concrete implementation of PDEFoam.
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Int_t fDim
Definition: PDEFoam.h:82
Double_t * fXmax
Definition: PDEFoam.h:105
Basic string class.
Definition: TString.h:129
int Int_t
Definition: RtypesCore.h:41
void SetCellElement(PDEFoamCell *cell, UInt_t i, Double_t value)
Set cell element i of cell to value.
Definition: PDEFoam.cxx:1433
#define NULL
Definition: RtypesCore.h:88
void SetBest(Int_t Best)
Definition: PDEFoamCell.h:79
Volume for BinarySearchTree.
Definition: Volume.h:48
Int_t GetTotDim() const
Definition: PDEFoam.h:195
UInt_t GetNmin()
Definition: PDEFoam.h:204
PDEFoamDensityBase * fDistr
Definition: PDEFoam.h:113
Int_t fNBin
Definition: PDEFoam.h:85
virtual void FillHistograms(TMVA::Volume &, std::vector< TH1D *> &, std::vector< TH1D *> &, std::vector< TH1D *> &, std::vector< TH1D *> &)
Fill the given histograms with signal and background events, which are found in the volume...
Float_t VarTransformInvers(Int_t idim, Float_t x) const
Definition: PDEFoam.h:296
void SetXdiv(Double_t Xdiv)
Definition: PDEFoamCell.h:80
virtual void Explore(PDEFoamCell *Cell)
Internal subprogram used by Create.
PDEFoamDecisionTree()
Default constructor for streamer, user should not use it.
void CalcVolume()
Calculates volume of the cell using size params which are calculated.
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void SetDriv(Double_t Driv)
Definition: PDEFoamCell.h:89
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
MsgLogger & Log() const
Definition: PDEFoam.h:238
Double_t * fXmin
Definition: PDEFoam.h:104
REAL epsilon
Definition: triangle.c:617
This PDEFoam variant acts like a decision tree and stores in every cell the discriminant.
#define ClassImp(name)
Definition: Rtypes.h:336
double Double_t
Definition: RtypesCore.h:55
void GetHcub(PDEFoamVect &, PDEFoamVect &) const
Provides size and position of the cell These parameter are calculated by analyzing information in all...
void SetIntg(Double_t Intg)
Definition: PDEFoamCell.h:88
Abstract ClassifierFactory template that handles arbitrary types.
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:310
This PDEFoam variant stores in every cell the discriminant.
virtual ~PDEFoamDecisionTree()
Destructor deletes fSepType.