ROOT  6.06/09
Reference Guide
PDEFoamDecisionTree.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Alexander Voigt
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Classes: PDEFoamDecisionTree *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation of decision tree like PDEFoam *
12  * *
13  * Authors (alphabetical): *
14  * Tancredi Carli - CERN, Switzerland *
15  * Dominik Dannheim - CERN, Switzerland *
16  * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
17  * Alexander Voigt - TU Dresden, Germany *
18  * Peter Speckmayer - CERN, Switzerland *
19  * *
20  * Copyright (c) 2010: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 //_____________________________________________________________________
30 //
31 // PDEFoamDecisionTree
32 //
33 // This PDEFoam variant acts like a decision tree and stores in every
34 // cell the discriminant
35 //
36 // D = #events with given class / total number of events
37 //
38 // as well as the statistical error on the discriminant. It therefore
39 // acts as a discriminant estimator. The decision tree-like behaviour
40 // is achieved by overriding PDEFoamDiscriminant::Explore() to use a
41 // decision tree-like cell splitting algorithm (given a separation
42 // type).
43 //
44 // This PDEFoam variant should be booked together with the
45 // PDEFoamDecisionTreeDensity density estimator, which returns the
46 // events in a cell without sampling.
47 //
48 //_____________________________________________________________________
49 
50 #ifndef ROOT_TMVA_PDEFoamDecisionTree
52 #endif
53 #ifndef ROOT_TMVA_PDEFoamDecisionTreeDensity
55 #endif
56 
58 
59 ////////////////////////////////////////////////////////////////////////////////
60 /// Default constructor for streamer, user should not use it.
61 
62 TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree()
63  : PDEFoamDiscriminant()
64  , fSepType(NULL)
65 {
66 }
67 
68 ////////////////////////////////////////////////////////////////////////////////
69 /// Parameters:
70 ///
71 /// - name - name of the foam
72 ///
73 /// - sepType - separation type used for the cell splitting (will be
74 /// deleted in the destructor)
75 ///
76 /// - cls - class to consider as signal when calcualting the purity
77 
79  : PDEFoamDiscriminant(name, cls)
80  , fSepType(sepType)
81 {
82 }
83 
84 ////////////////////////////////////////////////////////////////////////////////
85 /// Copy Constructor NOT IMPLEMENTED (NEVER USED)
86 
88  : PDEFoamDiscriminant(from)
89  , fSepType(from.fSepType)
90 {
91  Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl;
92 }
93 
94 ////////////////////////////////////////////////////////////////////////////////
95 /// Destructor
96 /// deletes fSepType
97 
99 {
100  if (fSepType)
101  delete fSepType;
102 }
103 
104 ////////////////////////////////////////////////////////////////////////////////
105 /// Internal subprogram used by Create. It explores newly defined
106 /// cell with according to the decision tree logic. The separation
107 /// set via the 'sepType' option in the constructor.
108 ///
109 /// The optimal division point for eventual future cell division is
110 /// determined/recorded. Note that links to parents and initial
111 /// volume = 1/2 parent has to be already defined prior to calling
112 /// this routine.
113 ///
114 /// Note, that according to the decision tree logic, a cell is only
115 /// split, if the number of (unweighted) events in each dautghter
116 /// cell is greater than fNmin.
117 
119 {
120  if (!cell)
121  Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl;
122 
123  // create edge histograms
124  std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw;
125  hsig.reserve(fDim);
126  hbkg.reserve(fDim);
127  hsig_unw.reserve(fDim);
128  hbkg_unw.reserve(fDim);
129  for (Int_t idim = 0; idim < fDim; idim++) {
130  hsig.push_back(new TH1D(Form("hsig_%i", idim),
131  Form("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
132  hbkg.push_back(new TH1D(Form("hbkg_%i", idim),
133  Form("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
134  hsig_unw.push_back(new TH1D(Form("hsig_unw_%i", idim),
135  Form("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
136  hbkg_unw.push_back(new TH1D(Form("hbkg_unw_%i", idim),
137  Form("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
138  }
139 
140  // get cell position and size
141  PDEFoamVect cellSize(GetTotDim()), cellPosi(GetTotDim());
142  cell->GetHcub(cellPosi, cellSize);
143 
144  // determine lower and upper cell bound
145  std::vector<Double_t> lb(GetTotDim()); // lower bound
146  std::vector<Double_t> ub(GetTotDim()); // upper bound
147  for (Int_t idim = 0; idim < GetTotDim(); idim++) {
148  lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon());
149  ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon());
150  }
151 
152  // fDistr must be of type PDEFoamDecisionTreeDensity*
153  PDEFoamDecisionTreeDensity *distr = dynamic_cast<PDEFoamDecisionTreeDensity*>(fDistr);
154  if (distr == NULL)
155  Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: "
156  << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl;
157 
158  // create TMVA::Volume object needed for searching within the BST
159  TMVA::Volume volume(&lb, &ub);
160 
161  // fill the signal and background histograms for the given volume
162  distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw);
163 
164  // ------ determine the best division edge
165  Double_t xBest = 0.5; // best division point
166  Int_t kBest = -1; // best split dimension
167  Double_t maxGain = -1.0; // maximum gain
168  Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1);
169  Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1);
170  Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1);
171  Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1);
172 
173  for (Int_t idim = 0; idim < fDim; ++idim) {
174  Double_t nSelS = hsig.at(idim)->GetBinContent(0);
175  Double_t nSelB = hbkg.at(idim)->GetBinContent(0);
176  Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0);
177  Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0);
178  for (Int_t jLo = 1; jLo < fNBin; jLo++) {
179  nSelS += hsig.at(idim)->GetBinContent(jLo);
180  nSelB += hbkg.at(idim)->GetBinContent(jLo);
181  nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo);
182  nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo);
183 
184  // proceed if total number of events in left and right cell
185  // is greater than fNmin
186  if (!((nSelS_unw + nSelB_unw) >= GetNmin() &&
187  (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin()))
188  continue;
189 
190  Double_t xLo = 1.0 * jLo / fNBin;
191 
192  // calculate separation gain
193  Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB);
194 
195  if (gain >= maxGain) {
196  maxGain = gain;
197  xBest = xLo;
198  kBest = idim;
199  }
200  } // jLo
201  } // idim
202 
203  if (kBest >= fDim || kBest < 0) {
204  // No best division edge found! One must ensure, that this cell
205  // is not chosen for splitting in PeekMax(). But since in
206  // PeekMax() it is ensured that cell->GetDriv() > epsilon, one
207  // should set maxGain to -1.0 (or even 0.0?) here.
208  maxGain = -1.0;
209  }
210 
211  // set cell properties
212  cell->SetBest(kBest);
213  cell->SetXdiv(xBest);
214  if (nTotB + nTotS > 0)
215  cell->SetIntg(nTotS / (nTotB + nTotS));
216  else
217  cell->SetIntg(0.0);
218  cell->SetDriv(maxGain);
219  cell->CalcVolume();
220 
221  // set cell element 0 (total number of events in cell) during
222  // build-up
223  if (GetNmin() > 0)
224  SetCellElement(cell, 0, nTotS + nTotB);
225 
226  // clean up
227  for (UInt_t ih = 0; ih < hsig.size(); ih++) delete hsig.at(ih);
228  for (UInt_t ih = 0; ih < hbkg.size(); ih++) delete hbkg.at(ih);
229  for (UInt_t ih = 0; ih < hsig_unw.size(); ih++) delete hsig_unw.at(ih);
230  for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++) delete hbkg_unw.at(ih);
231 }
ClassImp(TMVA::PDEFoamDecisionTree) TMVA
Default constructor for streamer, user should not use it.
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
MsgLogger & Log() const
Definition: PDEFoam.h:265
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
virtual void FillHistograms(TMVA::Volume &, std::vector< TH1D * > &, std::vector< TH1D * > &, std::vector< TH1D * > &, std::vector< TH1D * > &)
Fill the given histograms with signal and background events, which are found in the volume...
void SetBest(Int_t Best)
Definition: PDEFoamCell.h:85
THist< 1, double > TH1D
Definition: THist.h:314
void SetXdiv(Double_t Xdiv)
Definition: PDEFoamCell.h:86
virtual void Explore(PDEFoamCell *Cell)
Internal subprogram used by Create.
void CalcVolume()
Calculates volume of the cell using size params which are calculated.
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void SetDriv(Double_t Driv)
Definition: PDEFoamCell.h:95
REAL epsilon
Definition: triangle.c:617
double Double_t
Definition: RtypesCore.h:55
#define name(a, b)
Definition: linkTestLib0.cpp:5
void SetIntg(Double_t Intg)
Definition: PDEFoamCell.h:94
Abstract ClassifierFactory template that handles arbitrary types.
#define NULL
Definition: Rtypes.h:82
void GetHcub(PDEFoamVect &, PDEFoamVect &) const
Provides size and position of the cell These parameter are calculated by analyzing information in all...
Definition: math.cpp:60
virtual ~PDEFoamDecisionTree()
Destructor deletes fSepType.