Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodCuts.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Helge Voss, Kai Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodCuts *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Multivariate optimisation of signal efficiency for given background *
12 * efficiency, using rectangular minimum and maximum requirements on *
13 * input variables *
14 * *
15 * Authors (alphabetical): *
16 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
17 * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
18 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
19 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21 * *
22 * Copyright (c) 2005: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * LAPP, Annecy, France *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (http://tmva.sourceforge.net/LICENSE) *
31 **********************************************************************************/
32
33#ifndef ROOT_TMVA_MethodCuts
34#define ROOT_TMVA_MethodCuts
35
36//////////////////////////////////////////////////////////////////////////
37// //
38// MethodCuts //
39// //
40// Multivariate optimisation of signal efficiency for given background //
41// efficiency, using rectangular minimum and maximum requirements on //
42// input variables //
43// //
44//////////////////////////////////////////////////////////////////////////
45
46#include <vector>
47
48
49#include "TMVA/MethodBase.h"
51#include "TMVA/PDF.h"
52#include "TMatrixDfwd.h"
53#include "IFitterTarget.h"
54
55class TRandom;
56
57namespace TMVA {
58
59 class Interval;
60
61 class MethodCuts : public MethodBase, public IFitterTarget {
62
63 public:
64
65 MethodCuts( const TString& jobName,
66 const TString& methodTitle,
67 DataSetInfo& theData,
68 const TString& theOption = "MC:150:10000:");
69
70 MethodCuts( DataSetInfo& theData,
71 const TString& theWeightFile);
72
73 // this is a workaround which is necessary since CINT is not capable of handling dynamic casts
74 static MethodCuts* DynamicCast( IMethod* method ) { return dynamic_cast<MethodCuts*>(method); }
75
76 virtual ~MethodCuts( void );
77
78 virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
79
80 // training method
81 void Train( void );
82
84
85 void AddWeightsXMLTo ( void* parent ) const;
86
87 void ReadWeightsFromStream( std::istream & i );
88 void ReadWeightsFromXML ( void* wghtnode );
89
90 // calculate the MVA value (for CUTs this is just a dummy)
91 Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
92
93 // write method specific histos to target file
94 void WriteMonitoringHistosToFile( void ) const;
95
96 // test the method
97 void TestClassification();
98
99 // also overwrite --> not computed for cuts
100 Double_t GetSeparation ( TH1*, TH1* ) const { return -1; }
101 Double_t GetSeparation ( PDF* = 0, PDF* = 0 ) const { return -1; }
102 Double_t GetSignificance( void ) const { return -1; }
103 Double_t GetmuTransform ( TTree *) { return -1; }
106
107 // rarity distributions (signal or background (default) is uniform in [0,1])
109
110 // accessors for Minuit
111 Double_t ComputeEstimator( std::vector<Double_t> & );
112
113 Double_t EstimatorFunction( std::vector<Double_t> & );
114 Double_t EstimatorFunction( Int_t ievt1, Int_t ievt2 );
115
117
118 // retrieve cut values for given signal efficiency
119 void PrintCuts( Double_t effS ) const;
120 Double_t GetCuts ( Double_t effS, std::vector<Double_t>& cutMin, std::vector<Double_t>& cutMax ) const;
121 Double_t GetCuts ( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const;
122
123 // ranking of input variables (not available for cuts)
124 const Ranking* CreateRanking() { return 0; }
125
126 void DeclareOptions();
127 void ProcessOptions();
128
129 // maximum |cut| value
131
132 // no check of options at this place
133 void CheckSetup() {}
134
135 protected:
136
137 // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
138 void MakeClassSpecific( std::ostream&, const TString& ) const;
139
140 // get help message text
141 void GetHelpMessage() const;
142
143 private:
144
145 // optimisation method
152
153 // efficiency calculation method
154 // - kUseEventSelection: computes efficiencies from given data sample
155 // - kUsePDFs : creates smoothed PDFs from data samples, and
156 // uses this to compute efficiencies
159
160 // improve the Monte Carlo by providing some additional information
165
166 // general
167 TString fFitMethodS; // chosen fit method (string)
168 EFitMethodType fFitMethod; // chosen fit method
169 TString fEffMethodS; // chosen efficiency calculation method (string)
170 EEffMethod fEffMethod; // chosen efficiency calculation method
171 std::vector<EFitParameters>* fFitParams; // vector for series of fit methods
172 Double_t fTestSignalEff; // used to test optimized signal efficiency
173 Double_t fEffSMin; // used to test optimized signal efficiency
174 Double_t fEffSMax; // used to test optimized signal efficiency
175 Double_t* fCutRangeMin; // minimum of allowed cut range
176 Double_t* fCutRangeMax; // maximum of allowed cut range
177 std::vector<Interval*> fCutRange; // allowed ranges for cut optimisation
178
179 // for the use of the binary tree method
182
183 // MC method
184 Double_t** fCutMin; // minimum requirement
185 Double_t** fCutMax; // maximum requirement
186 Double_t* fTmpCutMin; // temporary minimum requirement
187 Double_t* fTmpCutMax; // temporary maximum requirement
188 TString* fAllVarsI; // what to do with variables
189
190 // relevant for all methods
191 Int_t fNpar; // number of parameters in fit (default: 2*Nvar)
192 Double_t fEffRef; // reference efficiency
193 std::vector<Int_t>* fRangeSign; // used to match cuts to fit parameters (and vice versa)
194 TRandom* fRandom; // random generator for MC optimisation method
195
196 // basic statistics
197 std::vector<Double_t>* fMeanS; // means of variables (signal)
198 std::vector<Double_t>* fMeanB; // means of variables (background)
199 std::vector<Double_t>* fRmsS; // RMSs of variables (signal)
200 std::vector<Double_t>* fRmsB; // RMSs of variables (background)
201
202 TH1* fEffBvsSLocal; // intermediate eff. background versus eff signal histo
203
204 // PDF section
205 std::vector<TH1*>* fVarHistS; // reference histograms (signal)
206 std::vector<TH1*>* fVarHistB; // reference histograms (background)
207 std::vector<TH1*>* fVarHistS_smooth; // smoothed reference histograms (signal)
208 std::vector<TH1*>* fVarHistB_smooth; // smoothed reference histograms (background)
209 std::vector<PDF*>* fVarPdfS; // reference PDFs (signal)
210 std::vector<PDF*>* fVarPdfB; // reference PDFs (background)
211
212 // negative efficiencies
213 Bool_t fNegEffWarning; // flag risen in case of negative efficiency warning
214
215
216 // the definition of fit parameters can be different from the actual
217 // cut requirements; these functions provide the matching
218 void MatchParsToCuts( const std::vector<Double_t>&, Double_t*, Double_t* );
220
221 void MatchCutsToPars( std::vector<Double_t>&, Double_t*, Double_t* );
222 void MatchCutsToPars( std::vector<Double_t>&, Double_t**, Double_t**, Int_t ibin );
223
224 // creates PDFs in case these are used to compute efficiencies
225 // (corresponds to: EffMethod == kUsePDFs)
226 void CreateVariablePDFs( void );
227
228 // returns signal and background efficiencies for given cuts - using event counting
229 void GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
230 Double_t& effS, Double_t& effB );
231 // returns signal and background efficiencies for given cuts - using PDFs
232 void GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
233 Double_t& effS, Double_t& effB );
234
235 // default initialisation method called by all constructors
236 void Init( void );
237
238 ClassDef(MethodCuts,0); // Multivariate optimisation of signal efficiency
239 };
240
241} // namespace TMVA
242
243#endif
unsigned int UInt_t
Definition RtypesCore.h:46
bool Bool_t
Definition RtypesCore.h:63
double Double_t
Definition RtypesCore.h:59
#define ClassDef(name, id)
Definition Rtypes.h:325
int type
Definition TGX11.cxx:121
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:58
A simple Binary search tree including a volume search method.
Class that contains all the data information.
Definition DataSetInfo.h:62
Interface for a fitter 'target'.
Interface for all concrete MVA method implementations.
Definition IMethod.h:53
Virtual base Class for all MVA method.
Definition MethodBase.h:111
friend class MethodCuts
Definition MethodBase.h:603
virtual void ReadWeightsFromStream(std::istream &)=0
Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular ...
Definition MethodCuts.h:61
TRandom * fRandom
Definition MethodCuts.h:194
TString fFitMethodS
Definition MethodCuts.h:167
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA.
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
BinarySearchTree * fBinaryTreeS
Definition MethodCuts.h:180
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
void SetTestSignalEfficiency(Double_t effS)
Definition MethodCuts.h:116
std::vector< Int_t > * fRangeSign
Definition MethodCuts.h:193
void DeclareOptions()
define the options (their key words) that can be set in the option string.
TString fEffMethodS
Definition MethodCuts.h:169
EFitMethodType fFitMethod
Definition MethodCuts.h:168
const Ranking * CreateRanking()
Definition MethodCuts.h:124
Bool_t fNegEffWarning
Definition MethodCuts.h:213
Double_t fEffSMin
Definition MethodCuts.h:173
Double_t * fCutRangeMax
Definition MethodCuts.h:176
Double_t GetSignificance(void) const
compute significance of mean difference
Definition MethodCuts.h:102
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
static MethodCuts * DynamicCast(IMethod *method)
Definition MethodCuts.h:74
void GetHelpMessage() const
get help message text
void Train(void)
training method: here the cuts are optimised for the training sample
Double_t GetRarity(Double_t, Types::ESBType) const
compute rarity:
Definition MethodCuts.h:108
static const Double_t fgMaxAbsCutVal
Definition MethodCuts.h:130
std::vector< TH1 * > * fVarHistB
Definition MethodCuts.h:206
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
std::vector< PDF * > * fVarPdfB
Definition MethodCuts.h:210
EEffMethod fEffMethod
Definition MethodCuts.h:170
Double_t * fTmpCutMin
Definition MethodCuts.h:186
Double_t ** fCutMin
Definition MethodCuts.h:184
std::vector< TH1 * > * fVarHistS
Definition MethodCuts.h:205
std::vector< Double_t > * fRmsB
Definition MethodCuts.h:200
Double_t fEffSMax
Definition MethodCuts.h:174
std::vector< TH1 * > * fVarHistB_smooth
Definition MethodCuts.h:208
Double_t GetmuTransform(TTree *)
Definition MethodCuts.h:103
std::vector< PDF * > * fVarPdfS
Definition MethodCuts.h:209
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
void Init(void)
default initialisation called by all constructors
Double_t GetTrainingEfficiency(const TString &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.
void ProcessOptions()
process user options.
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition MethodCuts.h:133
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
virtual ~MethodCuts(void)
destructor
Double_t GetSeparation(PDF *=0, PDF *=0) const
compute "separation" defined as
Definition MethodCuts.h:101
void TestClassification()
nothing to test
Double_t * fCutRangeMin
Definition MethodCuts.h:175
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
BinarySearchTree * fBinaryTreeB
Definition MethodCuts.h:181
std::vector< Double_t > * fRmsS
Definition MethodCuts.h:199
std::vector< Double_t > * fMeanS
Definition MethodCuts.h:197
std::vector< Double_t > * fMeanB
Definition MethodCuts.h:198
TString * fAllVarsI
Definition MethodCuts.h:188
std::vector< EFitParameters > * fFitParams
Definition MethodCuts.h:171
Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
Definition MethodCuts.h:100
Double_t fTestSignalEff
Definition MethodCuts.h:172
std::vector< Interval * > fCutRange
Definition MethodCuts.h:177
Double_t * fTmpCutMax
Definition MethodCuts.h:187
std::vector< TH1 * > * fVarHistS_smooth
Definition MethodCuts.h:207
void MatchParsToCuts(Double_t *, Double_t *, Double_t *)
Double_t ** fCutMax
Definition MethodCuts.h:185
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
void PrintCuts(Double_t effS) const
print cuts
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
Ranking for variables in method (implementation)
Definition Ranking.h:48
This is the base class for the ROOT Random number generators.
Definition TRandom.h:27
Basic string class.
Definition TString.h:136
A TTree represents a columnar dataset.
Definition TTree.h:79
create variable transformations