ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
PDEFoamDecisionTree.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Alexander Voigt
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Classes: PDEFoamDecisionTree *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation of decision tree like PDEFoam *
12  * *
13  * Authors (alphabetical): *
14  * Tancredi Carli - CERN, Switzerland *
15  * Dominik Dannheim - CERN, Switzerland *
16  * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
17  * Alexander Voigt - TU Dresden, Germany *
18  * Peter Speckmayer - CERN, Switzerland *
19  * *
20  * Copyright (c) 2010: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 //_____________________________________________________________________
30 //
31 // PDEFoamDecisionTree
32 //
33 // This PDEFoam variant acts like a decision tree and stores in every
34 // cell the discriminant
35 //
36 // D = #events with given class / total number of events
37 //
38 // as well as the statistical error on the discriminant. It therefore
39 // acts as a discriminant estimator. The decision tree-like behaviour
40 // is achieved by overriding PDEFoamDiscriminant::Explore() to use a
41 // decision tree-like cell splitting algorithm (given a separation
42 // type).
43 //
44 // This PDEFoam variant should be booked together with the
45 // PDEFoamDecisionTreeDensity density estimator, which returns the
46 // events in a cell without sampling.
47 //
48 //_____________________________________________________________________
49 
50 #ifndef ROOT_TMVA_PDEFoamDecisionTree
52 #endif
53 #ifndef ROOT_TMVA_PDEFoamDecisionTreeDensity
55 #endif
56 
57 #include "TMVA/MsgLogger.h"
58 #include "TMVA/PDEFoamCell.h"
60 #include "TMVA/PDEFoamVect.h"
61 #include "TMVA/SeparationBase.h"
62 #include "TMVA/Types.h"
63 #include "TMVA/Volume.h"
64 
65 #include "Rtypes.h"
66 #include "TH1D.h"
67 
68 class TString;
69 
71 
72 ////////////////////////////////////////////////////////////////////////////////
73 /// Default constructor for streamer, user should not use it.
74 
75 TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree()
76  : PDEFoamDiscriminant()
77  , fSepType(NULL)
78 {
79 }
80 
81 ////////////////////////////////////////////////////////////////////////////////
82 /// Parameters:
83 ///
84 /// - name - name of the foam
85 ///
86 /// - sepType - separation type used for the cell splitting (will be
87 /// deleted in the destructor)
88 ///
89 /// - cls - class to consider as signal when calcualting the purity
90 
92  : PDEFoamDiscriminant(name, cls)
93  , fSepType(sepType)
94 {
95 }
96 
97 ////////////////////////////////////////////////////////////////////////////////
98 /// Copy Constructor NOT IMPLEMENTED (NEVER USED)
99 
101  : PDEFoamDiscriminant(from)
102  , fSepType(from.fSepType)
103 {
104  Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl;
105 }
106 
107 ////////////////////////////////////////////////////////////////////////////////
108 /// Destructor
109 /// deletes fSepType
110 
112 {
113  if (fSepType)
114  delete fSepType;
115 }
116 
117 ////////////////////////////////////////////////////////////////////////////////
118 /// Internal subprogram used by Create. It explores newly defined
119 /// cell with according to the decision tree logic. The separation
120 /// set via the 'sepType' option in the constructor.
121 ///
122 /// The optimal division point for eventual future cell division is
123 /// determined/recorded. Note that links to parents and initial
124 /// volume = 1/2 parent has to be already defined prior to calling
125 /// this routine.
126 ///
127 /// Note, that according to the decision tree logic, a cell is only
128 /// split, if the number of (unweighted) events in each dautghter
129 /// cell is greater than fNmin.
130 
132 {
133  if (!cell)
134  Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl;
135 
136  // create edge histograms
137  std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw;
138  hsig.reserve(fDim);
139  hbkg.reserve(fDim);
140  hsig_unw.reserve(fDim);
141  hbkg_unw.reserve(fDim);
142  for (Int_t idim = 0; idim < fDim; idim++) {
143  hsig.push_back(new TH1D(Form("hsig_%i", idim),
144  Form("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
145  hbkg.push_back(new TH1D(Form("hbkg_%i", idim),
146  Form("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
147  hsig_unw.push_back(new TH1D(Form("hsig_unw_%i", idim),
148  Form("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
149  hbkg_unw.push_back(new TH1D(Form("hbkg_unw_%i", idim),
150  Form("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
151  }
152 
153  // get cell position and size
154  PDEFoamVect cellSize(GetTotDim()), cellPosi(GetTotDim());
155  cell->GetHcub(cellPosi, cellSize);
156 
157  // determine lower and upper cell bound
158  std::vector<Double_t> lb(GetTotDim()); // lower bound
159  std::vector<Double_t> ub(GetTotDim()); // upper bound
160  for (Int_t idim = 0; idim < GetTotDim(); idim++) {
161  lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon());
162  ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon());
163  }
164 
165  // fDistr must be of type PDEFoamDecisionTreeDensity*
167  if (distr == NULL)
168  Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: "
169  << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl;
170 
171  // create TMVA::Volume object needed for searching within the BST
172  TMVA::Volume volume(&lb, &ub);
173 
174  // fill the signal and background histograms for the given volume
175  distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw);
176 
177  // ------ determine the best division edge
178  Double_t xBest = 0.5; // best division point
179  Int_t kBest = -1; // best split dimension
180  Double_t maxGain = -1.0; // maximum gain
181  Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1);
182  Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1);
183  Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1);
184  Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1);
185 
186  for (Int_t idim = 0; idim < fDim; ++idim) {
187  Double_t nSelS = hsig.at(idim)->GetBinContent(0);
188  Double_t nSelB = hbkg.at(idim)->GetBinContent(0);
189  Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0);
190  Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0);
191  for (Int_t jLo = 1; jLo < fNBin; jLo++) {
192  nSelS += hsig.at(idim)->GetBinContent(jLo);
193  nSelB += hbkg.at(idim)->GetBinContent(jLo);
194  nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo);
195  nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo);
196 
197  // proceed if total number of events in left and right cell
198  // is greater than fNmin
199  if (!((nSelS_unw + nSelB_unw) >= GetNmin() &&
200  (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin()))
201  continue;
202 
203  Double_t xLo = 1.0 * jLo / fNBin;
204 
205  // calculate separation gain
206  Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB);
207 
208  if (gain >= maxGain) {
209  maxGain = gain;
210  xBest = xLo;
211  kBest = idim;
212  }
213  } // jLo
214  } // idim
215 
216  if (kBest >= fDim || kBest < 0) {
217  // No best division edge found! One must ensure, that this cell
218  // is not chosen for splitting in PeekMax(). But since in
219  // PeekMax() it is ensured that cell->GetDriv() > epsilon, one
220  // should set maxGain to -1.0 (or even 0.0?) here.
221  maxGain = -1.0;
222  }
223 
224  // set cell properties
225  cell->SetBest(kBest);
226  cell->SetXdiv(xBest);
227  if (nTotB + nTotS > 0)
228  cell->SetIntg(nTotS / (nTotB + nTotS));
229  else
230  cell->SetIntg(0.0);
231  cell->SetDriv(maxGain);
232  cell->CalcVolume();
233 
234  // set cell element 0 (total number of events in cell) during
235  // build-up
236  if (GetNmin() > 0)
237  SetCellElement(cell, 0, nTotS + nTotB);
238 
239  // clean up
240  for (UInt_t ih = 0; ih < hsig.size(); ih++) delete hsig.at(ih);
241  for (UInt_t ih = 0; ih < hbkg.size(); ih++) delete hbkg.at(ih);
242  for (UInt_t ih = 0; ih < hsig_unw.size(); ih++) delete hsig_unw.at(ih);
243  for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++) delete hbkg_unw.at(ih);
244 }
ClassImp(TMVA::PDEFoamDecisionTree) TMVA
Default constructor for streamer, user should not use it.
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
MsgLogger & Log() const
Definition: PDEFoam.h:262
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
virtual void FillHistograms(TMVA::Volume &, std::vector< TH1D * > &, std::vector< TH1D * > &, std::vector< TH1D * > &, std::vector< TH1D * > &)
Fill the given histograms with signal and background events, which are found in the volume...
void SetBest(Int_t Best)
Definition: PDEFoamCell.h:85
double distr(double *x, double *p)
Definition: unuranDemo.C:104
void SetXdiv(Double_t Xdiv)
Definition: PDEFoamCell.h:86
virtual void Explore(PDEFoamCell *Cell)
Internal subprogram used by Create.
void CalcVolume()
Calculates volume of the cell using size params which are calculated.
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void SetDriv(Double_t Driv)
Definition: PDEFoamCell.h:95
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:613
REAL epsilon
Definition: triangle.c:617
double Double_t
Definition: RtypesCore.h:55
#define name(a, b)
Definition: linkTestLib0.cpp:5
void SetIntg(Double_t Intg)
Definition: PDEFoamCell.h:94
#define NULL
Definition: Rtypes.h:82
void GetHcub(PDEFoamVect &, PDEFoamVect &) const
Provides size and position of the cell These parameter are calculated by analyzing information in all...
Definition: math.cpp:60
virtual ~PDEFoamDecisionTree()
Destructor deletes fSepType.