Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
PDEFoamDecisionTree.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Alexander Voigt
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Classes: PDEFoamDecisionTree *
8 * *
9 * *
10 * Description: *
11 * Implementation of decision tree like PDEFoam *
12 * *
13 * Authors (alphabetical): *
14 * Tancredi Carli - CERN, Switzerland *
15 * Dominik Dannheim - CERN, Switzerland *
16 * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
17 * Alexander Voigt - TU Dresden, Germany *
18 * Peter Speckmayer - CERN, Switzerland *
19 * *
20 * Copyright (c) 2010: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (see tmva/doc/LICENSE) *
27 **********************************************************************************/
28
29/*! \class TMVA::PDEFoamDecisionTree
30\ingroup TMVA
31
32This PDEFoam variant acts like a decision tree and stores in every
33cell the discriminant
34
35 D = #events with given class / total number of events
36
37as well as the statistical error on the discriminant. It therefore
38acts as a discriminant estimator. The decision tree-like behaviour
39is achieved by overriding PDEFoamDiscriminant::Explore() to use a
40decision tree-like cell splitting algorithm (given a separation
41type).
42
43This PDEFoam variant should be booked together with the
44PDEFoamDecisionTreeDensity density estimator, which returns the
45events in a cell without sampling.
46*/
47
50
51#include "TMVA/MsgLogger.h"
52#include "TMVA/PDEFoamCell.h"
54#include "TMVA/PDEFoamVect.h"
55#include "TMVA/SeparationBase.h"
56#include "TMVA/Types.h"
57#include "TMVA/Volume.h"
58
59#include "Rtypes.h"
60#include "TH1D.h"
61
62class TString;
63
64
65////////////////////////////////////////////////////////////////////////////////
66/// Default constructor for streamer, user should not use it.
67
73
74////////////////////////////////////////////////////////////////////////////////
75/// Parameters:
76///
77/// - name - name of the foam
78///
79/// - sepType - separation type used for the cell splitting (will be
80/// deleted in the destructor)
81///
82/// - cls - class to consider as signal when calculating the purity
83
89
90////////////////////////////////////////////////////////////////////////////////
91/// Copy Constructor NOT IMPLEMENTED (NEVER USED)
92
95 , fSepType(from.fSepType)
96{
97 Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl;
98}
99
100////////////////////////////////////////////////////////////////////////////////
101/// Destructor
102/// deletes fSepType
103
105{
106 if (fSepType)
107 delete fSepType;
108}
109
110////////////////////////////////////////////////////////////////////////////////
111/// Internal subprogram used by Create. It explores newly defined
112/// cell with according to the decision tree logic. The separation
113/// set via the 'sepType' option in the constructor.
114///
115/// The optimal division point for eventual future cell division is
116/// determined/recorded. Note that links to parents and initial
117/// volume = 1/2 parent has to be already defined prior to calling
118/// this routine.
119///
120/// Note, that according to the decision tree logic, a cell is only
121/// split, if the number of (unweighted) events in each daughter
122/// cell is greater than fNmin.
123
125{
126 if (!cell)
127 Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl;
128
129 // create edge histograms
130 std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw;
131 hsig.reserve(fDim);
132 hbkg.reserve(fDim);
133 hsig_unw.reserve(fDim);
134 hbkg_unw.reserve(fDim);
135 for (Int_t idim = 0; idim < fDim; idim++) {
136 hsig.push_back(new TH1D(TString::Format("hsig_%i", idim),
137 TString::Format("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
138 hbkg.push_back(new TH1D(TString::Format("hbkg_%i", idim),
139 TString::Format("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
140 hsig_unw.push_back(new TH1D(TString::Format("hsig_unw_%i", idim),
141 TString::Format("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
142 hbkg_unw.push_back(new TH1D(TString::Format("hbkg_unw_%i", idim),
143 TString::Format("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
144 }
145
146 // get cell position and size
147 PDEFoamVect cellSize(GetTotDim()), cellPosi(GetTotDim());
148 cell->GetHcub(cellPosi, cellSize);
149
150 // determine lower and upper cell bound
151 std::vector<Double_t> lb(GetTotDim()); // lower bound
152 std::vector<Double_t> ub(GetTotDim()); // upper bound
153 for (Int_t idim = 0; idim < GetTotDim(); idim++) {
154 lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon());
155 ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon());
156 }
157
158 // fDistr must be of type PDEFoamDecisionTreeDensity*
160 if (distr == NULL)
161 Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: "
162 << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl;
163
164 // create TMVA::Volume object needed for searching within the BST
165 TMVA::Volume volume(&lb, &ub);
166
167 // fill the signal and background histograms for the given volume
168 distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw);
169
170 // ------ determine the best division edge
171 Double_t xBest = 0.5; // best division point
172 Int_t kBest = -1; // best split dimension
173 Double_t maxGain = -1.0; // maximum gain
174 Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1);
175 Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1);
176 Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1);
177 Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1);
178
179 for (Int_t idim = 0; idim < fDim; ++idim) {
180 Double_t nSelS = hsig.at(idim)->GetBinContent(0);
181 Double_t nSelB = hbkg.at(idim)->GetBinContent(0);
182 Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0);
183 Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0);
184 for (Int_t jLo = 1; jLo < fNBin; jLo++) {
185 nSelS += hsig.at(idim)->GetBinContent(jLo);
186 nSelB += hbkg.at(idim)->GetBinContent(jLo);
187 nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo);
188 nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo);
189
190 // proceed if total number of events in left and right cell
191 // is greater than fNmin
192 if (!((nSelS_unw + nSelB_unw) >= GetNmin() &&
193 (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin()))
194 continue;
195
196 Double_t xLo = 1.0 * jLo / fNBin;
197
198 // calculate separation gain
199 Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB);
200
201 if (gain >= maxGain) {
202 maxGain = gain;
203 xBest = xLo;
204 kBest = idim;
205 }
206 } // jLo
207 } // idim
208
209 if (kBest >= fDim || kBest < 0) {
210 // No best division edge found! One must ensure, that this cell
211 // is not chosen for splitting in PeekMax(). But since in
212 // PeekMax() it is ensured that cell->GetDriv() > epsilon, one
213 // should set maxGain to -1.0 (or even 0.0?) here.
214 maxGain = -1.0;
215 }
216
217 // set cell properties
218 cell->SetBest(kBest);
219 cell->SetXdiv(xBest);
220 if (nTotB + nTotS > 0)
221 cell->SetIntg(nTotS / (nTotB + nTotS));
222 else
223 cell->SetIntg(0.0);
224 cell->SetDriv(maxGain);
225 cell->CalcVolume();
226
227 // set cell element 0 (total number of events in cell) during
228 // build-up
229 if (GetNmin() > 0)
230 SetCellElement(cell, 0, nTotS + nTotB);
231
232 // clean up
233 for (UInt_t ih = 0; ih < hsig.size(); ih++) delete hsig.at(ih);
234 for (UInt_t ih = 0; ih < hbkg.size(); ih++) delete hbkg.at(ih);
235 for (UInt_t ih = 0; ih < hsig_unw.size(); ih++) delete hsig_unw.at(ih);
236 for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++) delete hbkg_unw.at(ih);
237}
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
char name[80]
Definition TGX11.cxx:110
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:927
This is a concrete implementation of PDEFoam.
This PDEFoam variant acts like a decision tree and stores in every cell the discriminant.
void Explore(PDEFoamCell *Cell) override
Internal subprogram used by Create.
PDEFoamDecisionTree()
Default constructor for streamer, user should not use it.
virtual ~PDEFoamDecisionTree()
Destructor deletes fSepType.
This PDEFoam variant stores in every cell the discriminant.
MsgLogger & Log() const
Definition PDEFoam.h:240
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Volume for BinarySearchTree.
Definition Volume.h:47
Basic string class.
Definition TString.h:138
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148