Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodCuts.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Helge Voss, Kai Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodCuts *
8 * *
9 * *
10 * Description: *
11 * Multivariate optimisation of signal efficiency for given background *
12 * efficiency, using rectangular minimum and maximum requirements on *
13 * input variables *
14 * *
15 * Authors (alphabetical): *
16 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
17 * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
18 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
19 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21 * *
22 * Copyright (c) 2005: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * LAPP, Annecy, France *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (see tmva/doc/LICENSE) *
31 **********************************************************************************/
32
33#ifndef ROOT_TMVA_MethodCuts
34#define ROOT_TMVA_MethodCuts
35
36//////////////////////////////////////////////////////////////////////////
37// //
38// MethodCuts //
39// //
40// Multivariate optimisation of signal efficiency for given background //
41// efficiency, using rectangular minimum and maximum requirements on //
42// input variables //
43// //
44//////////////////////////////////////////////////////////////////////////
45
46#include <vector>
47
48
49#include "TMVA/MethodBase.h"
51#include "TMVA/PDF.h"
52#include "TMatrixDfwd.h"
53#include "IFitterTarget.h"
54
55class TRandom;
56
57namespace TMVA {
58
59 class Interval;
60
61 class MethodCuts : public MethodBase, public IFitterTarget {
62
63 public:
64
66 const TString& methodTitle,
68 const TString& theOption = "MC:150:10000:");
69
71 const TString& theWeightFile);
72
73 virtual ~MethodCuts( void );
74
76
77 // training method
78 void Train( void ) override;
79
81
82 void AddWeightsXMLTo ( void* parent ) const override;
83
84 void ReadWeightsFromStream( std::istream & i ) override;
85 void ReadWeightsFromXML ( void* wghtnode ) override;
86
87 // calculate the MVA value (for CUTs this is just a dummy)
88 Double_t GetMvaValue( Double_t* err = nullptr, Double_t* errUpper = nullptr ) override;
89
90 // write method specific histos to target file
91 void WriteMonitoringHistosToFile( void ) const override;
92
93 // test the method
94 void TestClassification() override;
95
96 // also overwrite --> not computed for cuts
97 Double_t GetSeparation ( TH1*, TH1* ) const override { return -1; }
98 Double_t GetSeparation ( PDF* = nullptr, PDF* = nullptr ) const override { return -1; }
99 Double_t GetSignificance( void ) const override { return -1; }
100 Double_t GetmuTransform ( TTree *) { return -1; }
102 Double_t GetTrainingEfficiency(const TString& ) override;
103
104 // rarity distributions (signal or background (default) is uniform in [0,1])
105 Double_t GetRarity( Double_t, Types::ESBType ) const override { return 0; }
106
107 // accessors for Minuit
108 Double_t ComputeEstimator( std::vector<Double_t> & );
109
110 Double_t EstimatorFunction( std::vector<Double_t> & ) override;
112
114
115 // retrieve cut values for given signal efficiency
116 void PrintCuts( Double_t effS ) const;
117 Double_t GetCuts ( Double_t effS, std::vector<Double_t>& cutMin, std::vector<Double_t>& cutMax ) const;
119
120 // ranking of input variables (not available for cuts)
121 const Ranking* CreateRanking() override { return nullptr; }
122
123 void DeclareOptions() override;
124 void ProcessOptions() override;
125
126 // maximum |cut| value
128
129 // no check of options at this place
130 void CheckSetup() override {}
131
132 protected:
133
134 // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
135 void MakeClassSpecific( std::ostream&, const TString& ) const override;
136
137 // get help message text
138 void GetHelpMessage() const override;
139
140 private:
141
142 // optimisation method
149
150 // efficiency calculation method
151 // - kUseEventSelection: computes efficiencies from given data sample
152 // - kUsePDFs : creates smoothed PDFs from data samples, and
153 // uses this to compute efficiencies
156
157 // improve the Monte Carlo by providing some additional information
162
163 // general
164 TString fFitMethodS; ///< chosen fit method (string)
165 EFitMethodType fFitMethod; ///< chosen fit method
166 TString fEffMethodS; ///< chosen efficiency calculation method (string)
167 EEffMethod fEffMethod; ///< chosen efficiency calculation method
168 std::vector<EFitParameters>* fFitParams; ///< vector for series of fit methods
169 Double_t fTestSignalEff; ///< used to test optimized signal efficiency
170 Double_t fEffSMin; ///< used to test optimized signal efficiency
171 Double_t fEffSMax; ///< used to test optimized signal efficiency
172 Double_t* fCutRangeMin; ///< minimum of allowed cut range
173 Double_t* fCutRangeMax; ///< maximum of allowed cut range
174 std::vector<Interval*> fCutRange; ///< allowed ranges for cut optimisation
175
176 // for the use of the binary tree method
179
180 // MC method
181 Double_t** fCutMin; ///< minimum requirement
182 Double_t** fCutMax; ///< maximum requirement
183 Double_t* fTmpCutMin; ///< temporary minimum requirement
184 Double_t* fTmpCutMax; ///< temporary maximum requirement
185 TString* fAllVarsI; ///< what to do with variables
186
187 // relevant for all methods
188 Int_t fNpar; ///< number of parameters in fit (default: 2*Nvar)
189 Double_t fEffRef; ///< reference efficiency
190 std::vector<Int_t>* fRangeSign; ///< used to match cuts to fit parameters (and vice versa)
191 TRandom* fRandom; ///< random generator for MC optimisation method
192
193 // basic statistics
194 std::vector<Double_t>* fMeanS; ///< means of variables (signal)
195 std::vector<Double_t>* fMeanB; ///< means of variables (background)
196 std::vector<Double_t>* fRmsS; ///< RMSs of variables (signal)
197 std::vector<Double_t>* fRmsB; ///< RMSs of variables (background)
198
199 TH1* fEffBvsSLocal; ///< intermediate eff. background versus eff signal histo
200
201 // PDF section
202 std::vector<TH1*>* fVarHistS; ///< reference histograms (signal)
203 std::vector<TH1*>* fVarHistB; ///< reference histograms (background)
204 std::vector<TH1*>* fVarHistS_smooth; ///< smoothed reference histograms (signal)
205 std::vector<TH1*>* fVarHistB_smooth; ///< smoothed reference histograms (background)
206 std::vector<PDF*>* fVarPdfS; ///< reference PDFs (signal)
207 std::vector<PDF*>* fVarPdfB; ///< reference PDFs (background)
208
209 // negative efficiencies
210 Bool_t fNegEffWarning; ///< flag risen in case of negative efficiency warning
211
212
213 // the definition of fit parameters can be different from the actual
214 // cut requirements; these functions provide the matching
215 void MatchParsToCuts( const std::vector<Double_t>&, Double_t*, Double_t* );
217
218 void MatchCutsToPars( std::vector<Double_t>&, Double_t*, Double_t* );
219 void MatchCutsToPars( std::vector<Double_t>&, Double_t**, Double_t**, Int_t ibin );
220
221 // creates PDFs in case these are used to compute efficiencies
222 // (corresponds to: EffMethod == kUsePDFs)
223 void CreateVariablePDFs( void );
224
225 // returns signal and background efficiencies for given cuts - using event counting
228 // returns signal and background efficiencies for given cuts - using PDFs
231
232 // default initialisation method called by all constructors
233 void Init( void ) override;
234
235 ClassDefOverride(MethodCuts,0); // Multivariate optimisation of signal efficiency
236 };
237
238} // namespace TMVA
239
240#endif
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
#define ClassDefOverride(name, id)
Definition Rtypes.h:348
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
A simple Binary search tree including a volume search method.
Class that contains all the data information.
Definition DataSetInfo.h:62
Interface for a fitter 'target'.
Virtual base Class for all MVA method.
Definition MethodBase.h:111
friend class MethodCuts
Definition MethodBase.h:603
void ReadWeightsFromStream(std::istream &) override=0
Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular ...
Definition MethodCuts.h:61
TRandom * fRandom
random generator for MC optimisation method
Definition MethodCuts.h:191
void CheckSetup() override
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition MethodCuts.h:130
Double_t EstimatorFunction(std::vector< Double_t > &) override
returns estimator for "cut fitness" used by GA
Double_t fEffRef
reference efficiency
Definition MethodCuts.h:189
TString fFitMethodS
chosen fit method (string)
Definition MethodCuts.h:164
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA.
const Ranking * CreateRanking() override
Definition MethodCuts.h:121
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
BinarySearchTree * fBinaryTreeS
Definition MethodCuts.h:177
void AddWeightsXMLTo(void *parent) const override
create XML description for LD classification and regression (for arbitrary number of output classes/t...
void SetTestSignalEfficiency(Double_t effS)
Definition MethodCuts.h:113
std::vector< Int_t > * fRangeSign
used to match cuts to fit parameters (and vice versa)
Definition MethodCuts.h:190
Int_t fNpar
number of parameters in fit (default: 2*Nvar)
Definition MethodCuts.h:188
TString fEffMethodS
chosen efficiency calculation method (string)
Definition MethodCuts.h:166
EFitMethodType fFitMethod
chosen fit method
Definition MethodCuts.h:165
Double_t GetRarity(Double_t, Types::ESBType) const override
compute rarity:
Definition MethodCuts.h:105
Bool_t fNegEffWarning
flag risen in case of negative efficiency warning
Definition MethodCuts.h:210
void DeclareOptions() override
define the options (their key words) that can be set in the option string.
void Train(void) override
training method: here the cuts are optimised for the training sample
Double_t fEffSMin
used to test optimized signal efficiency
Definition MethodCuts.h:170
Double_t * fCutRangeMax
maximum of allowed cut range
Definition MethodCuts.h:173
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
Double_t GetTrainingEfficiency(const TString &) override
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
void ProcessOptions() override
process user options.
static const Double_t fgMaxAbsCutVal
Definition MethodCuts.h:127
std::vector< TH1 * > * fVarHistB
reference histograms (background)
Definition MethodCuts.h:203
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
Cuts can only handle classification with 2 classes.
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
std::vector< PDF * > * fVarPdfB
reference PDFs (background)
Definition MethodCuts.h:207
EEffMethod fEffMethod
chosen efficiency calculation method
Definition MethodCuts.h:167
Double_t * fTmpCutMin
temporary minimum requirement
Definition MethodCuts.h:183
Double_t ** fCutMin
minimum requirement
Definition MethodCuts.h:181
void ReadWeightsFromStream(std::istream &i) override
read the cuts from stream
std::vector< TH1 * > * fVarHistS
reference histograms (signal)
Definition MethodCuts.h:202
std::vector< Double_t > * fRmsB
RMSs of variables (background)
Definition MethodCuts.h:197
Double_t fEffSMax
used to test optimized signal efficiency
Definition MethodCuts.h:171
std::vector< TH1 * > * fVarHistB_smooth
smoothed reference histograms (background)
Definition MethodCuts.h:205
Double_t GetmuTransform(TTree *)
Definition MethodCuts.h:100
std::vector< PDF * > * fVarPdfS
reference PDFs (signal)
Definition MethodCuts.h:206
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample
void TestClassification() override
nothing to test
void WriteMonitoringHistosToFile(void) const override
write histograms and PDFs to file for monitoring purposes
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
virtual ~MethodCuts(void)
destructor
Double_t GetSeparation(TH1 *, TH1 *) const override
compute "separation" defined as
Definition MethodCuts.h:97
Double_t * fCutRangeMin
minimum of allowed cut range
Definition MethodCuts.h:172
Double_t GetSeparation(PDF *=nullptr, PDF *=nullptr) const override
compute "separation" defined as
Definition MethodCuts.h:98
Double_t GetSignificance(void) const override
compute significance of mean difference
Definition MethodCuts.h:99
BinarySearchTree * fBinaryTreeB
Definition MethodCuts.h:178
std::vector< Double_t > * fRmsS
RMSs of variables (signal)
Definition MethodCuts.h:196
std::vector< Double_t > * fMeanS
means of variables (signal)
Definition MethodCuts.h:194
std::vector< Double_t > * fMeanB
means of variables (background)
Definition MethodCuts.h:195
TString * fAllVarsI
what to do with variables
Definition MethodCuts.h:185
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &) override
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
std::vector< EFitParameters > * fFitParams
vector for series of fit methods
Definition MethodCuts.h:168
Double_t fTestSignalEff
used to test optimized signal efficiency
Definition MethodCuts.h:169
std::vector< Interval * > fCutRange
allowed ranges for cut optimisation
Definition MethodCuts.h:174
Double_t * fTmpCutMax
temporary maximum requirement
Definition MethodCuts.h:184
std::vector< TH1 * > * fVarHistS_smooth
smoothed reference histograms (signal)
Definition MethodCuts.h:204
void Init(void) override
default initialisation called by all constructors
void MatchParsToCuts(Double_t *, Double_t *, Double_t *)
Double_t ** fCutMax
maximum requirement
Definition MethodCuts.h:182
TH1 * fEffBvsSLocal
intermediate eff. background versus eff signal histo
Definition MethodCuts.h:199
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
void ReadWeightsFromXML(void *wghtnode) override
read coefficients from xml weight file
void GetHelpMessage() const override
get help message text
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
void PrintCuts(Double_t effS) const
print cuts
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
Ranking for variables in method (implementation)
Definition Ranking.h:48
This is the base class for the ROOT Random number generators.
Definition TRandom.h:27
Basic string class.
Definition TString.h:138
A TTree represents a columnar dataset.
Definition TTree.h:89
create variable transformations