Logo ROOT   6.10/09
Reference Guide
MethodCuts.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCuts *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Multivariate optimisation of signal efficiency for given background *
12  * efficiency, using rectangular minimum and maximum requirements on *
13  * input variables *
14  * *
15  * Authors (alphabetical): *
16  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
17  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
18  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * LAPP, Annecy, France *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  **********************************************************************************/
32 
33 #ifndef ROOT_TMVA_MethodCuts
34 #define ROOT_TMVA_MethodCuts
35 
36 //////////////////////////////////////////////////////////////////////////
37 // //
38 // MethodCuts //
39 // //
40 // Multivariate optimisation of signal efficiency for given background //
41 // efficiency, using rectangular minimum and maximum requirements on //
42 // input variables //
43 // //
44 //////////////////////////////////////////////////////////////////////////
45 
46 #include <vector>
47 #include <map>
48 
49 #include "TMVA/MethodBase.h"
50 #include "TMVA/BinarySearchTree.h"
51 #include "TMVA/PDF.h"
52 #include "TMatrixDfwd.h"
53 #include "IFitterTarget.h"
54 
55 class TRandom;
56 
57 namespace TMVA {
58 
59  class Interval;
60 
61  class MethodCuts : public MethodBase, public IFitterTarget {
62 
63  public:
64 
65  MethodCuts( const TString& jobName,
66  const TString& methodTitle,
67  DataSetInfo& theData,
68  const TString& theOption = "MC:150:10000:");
69 
70  MethodCuts( DataSetInfo& theData,
71  const TString& theWeightFile);
72 
73  // this is a workaround which is necessary since CINT is not capable of handling dynamic casts
74  static MethodCuts* DynamicCast( IMethod* method ) { return dynamic_cast<MethodCuts*>(method); }
75 
76  virtual ~MethodCuts( void );
77 
78  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
79 
80  // training method
81  void Train( void );
82 
84 
85  void AddWeightsXMLTo ( void* parent ) const;
86 
87  void ReadWeightsFromStream( std::istream & i );
88  void ReadWeightsFromXML ( void* wghtnode );
89 
90  // calculate the MVA value (for CUTs this is just a dummy)
91  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
92 
93  // write method specific histos to target file
94  void WriteMonitoringHistosToFile( void ) const;
95 
96  // test the method
97  void TestClassification();
98 
99  // also overwrite --> not computed for cuts
100  Double_t GetSeparation ( TH1*, TH1* ) const { return -1; }
101  Double_t GetSeparation ( PDF* = 0, PDF* = 0 ) const { return -1; }
102  Double_t GetSignificance( void ) const { return -1; }
103  Double_t GetmuTransform ( TTree *) { return -1; }
106 
107  // rarity distributions (signal or background (default) is uniform in [0,1])
108  Double_t GetRarity( Double_t, Types::ESBType ) const { return 0; }
109 
110  // accessors for Minuit
111  Double_t ComputeEstimator( std::vector<Double_t> & );
112 
113  Double_t EstimatorFunction( std::vector<Double_t> & );
114  Double_t EstimatorFunction( Int_t ievt1, Int_t ievt2 );
115 
117 
118  // retrieve cut values for given signal efficiency
119  void PrintCuts( Double_t effS ) const;
120  Double_t GetCuts ( Double_t effS, std::vector<Double_t>& cutMin, std::vector<Double_t>& cutMax ) const;
121  Double_t GetCuts ( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const;
122 
123  // ranking of input variables (not available for cuts)
124  const Ranking* CreateRanking() { return 0; }
125 
126  void DeclareOptions();
127  void ProcessOptions();
128 
129  // maximum |cut| value
130  static const Double_t fgMaxAbsCutVal;
131 
132  // no check of options at this place
133  void CheckSetup() {}
134 
135  protected:
136 
137  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
138  void MakeClassSpecific( std::ostream&, const TString& ) const;
139 
140  // get help message text
141  void GetHelpMessage() const;
142 
143  private:
144 
145  // optimisation method
152 
153  // efficiency calculation method
154  // - kUseEventSelection: computes efficiencies from given data sample
155  // - kUsePDFs : creates smoothed PDFs from data samples, and
156  // uses this to compute efficiencies
159 
160  // improve the Monte Carlo by providing some additional information
165 
166  // general
167  TString fFitMethodS; // chosen fit method (string)
168  EFitMethodType fFitMethod; // chosen fit method
169  TString fEffMethodS; // chosen efficiency calculation method (string)
170  EEffMethod fEffMethod; // chosen efficiency calculation method
171  std::vector<EFitParameters>* fFitParams; // vector for series of fit methods
172  Double_t fTestSignalEff; // used to test optimized signal efficiency
173  Double_t fEffSMin; // used to test optimized signal efficiency
174  Double_t fEffSMax; // used to test optimized signal efficiency
175  Double_t* fCutRangeMin; // minimum of allowed cut range
176  Double_t* fCutRangeMax; // maximum of allowed cut range
177  std::vector<Interval*> fCutRange; // allowed ranges for cut optimisation
178 
179  // for the use of the binary tree method
182 
183  // MC method
184  Double_t** fCutMin; // minimum requirement
185  Double_t** fCutMax; // maximum requirement
186  Double_t* fTmpCutMin; // temporary minimum requirement
187  Double_t* fTmpCutMax; // temporary maximum requirement
188  TString* fAllVarsI; // what to do with variables
189 
190  // relevant for all methods
191  Int_t fNpar; // number of parameters in fit (default: 2*Nvar)
192  Double_t fEffRef; // reference efficiency
193  std::vector<Int_t>* fRangeSign; // used to match cuts to fit parameters (and vice versa)
194  TRandom* fRandom; // random generator for MC optimisation method
195 
196  // basic statistics
197  std::vector<Double_t>* fMeanS; // means of variables (signal)
198  std::vector<Double_t>* fMeanB; // means of variables (background)
199  std::vector<Double_t>* fRmsS; // RMSs of variables (signal)
200  std::vector<Double_t>* fRmsB; // RMSs of variables (background)
201 
202  TH1* fEffBvsSLocal; // intermediate eff. background versus eff signal histo
203 
204  // PDF section
205  std::vector<TH1*>* fVarHistS; // reference histograms (signal)
206  std::vector<TH1*>* fVarHistB; // reference histograms (background)
207  std::vector<TH1*>* fVarHistS_smooth; // smoothed reference histograms (signal)
208  std::vector<TH1*>* fVarHistB_smooth; // smoothed reference histograms (background)
209  std::vector<PDF*>* fVarPdfS; // reference PDFs (signal)
210  std::vector<PDF*>* fVarPdfB; // reference PDFs (background)
211 
212  // negative efficiencies
213  Bool_t fNegEffWarning; // flag risen in case of negative efficiency warning
214 
215 
216  // the definition of fit parameters can be different from the actual
217  // cut requirements; these functions provide the matching
218  void MatchParsToCuts( const std::vector<Double_t>&, Double_t*, Double_t* );
220 
221  void MatchCutsToPars( std::vector<Double_t>&, Double_t*, Double_t* );
222  void MatchCutsToPars( std::vector<Double_t>&, Double_t**, Double_t**, Int_t ibin );
223 
224  // creates PDFs in case these are used to compute efficiencies
225  // (corresponds to: EffMethod == kUsePDFs)
226  void CreateVariablePDFs( void );
227 
228  // returns signal and background efficiencies for given cuts - using event counting
229  void GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
230  Double_t& effS, Double_t& effB );
231  // returns signal and background efficiencies for given cuts - using PDFs
232  void GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
233  Double_t& effS, Double_t& effB );
234 
235  // default initialisation method called by all constructors
236  void Init( void );
237 
238  ClassDef(MethodCuts,0); // Multivariate optimisation of signal efficiency
239  };
240 
241 } // namespace TMVA
242 
243 #endif
std::vector< Double_t > * fRmsS
Definition: MethodCuts.h:199
EEffMethod fEffMethod
Definition: MethodCuts.h:170
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
Definition: MethodCuts.cxx:432
void GetHelpMessage() const
get help message text
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
TString fEffMethodS
Definition: MethodCuts.h:169
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA.
Definition: MethodCuts.cxx:893
void TestClassification()
nothing to test
Definition: MethodCuts.cxx:827
Double_t GetSignificance(void) const
compute significance of mean difference
Definition: MethodCuts.h:102
MethodCuts(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="MC:150:10000:")
standard constructor
Definition: MethodCuts.cxx:129
Double_t fTestSignalEff
Definition: MethodCuts.h:172
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodCuts.h:133
void SetTestSignalEfficiency(Double_t effS)
Definition: MethodCuts.h:116
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
Definition: MethodCuts.cxx:974
Double_t * fTmpCutMin
Definition: MethodCuts.h:186
EAnalysisType
Definition: Types.h:125
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
Double_t * fCutRangeMax
Definition: MethodCuts.h:176
std::vector< TH1 * > * fVarHistS
Definition: MethodCuts.h:205
Basic string class.
Definition: TString.h:129
const Ranking * CreateRanking()
Definition: MethodCuts.h:124
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
Definition: MethodCuts.cxx:551
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< PDF * > * fVarPdfS
Definition: MethodCuts.h:209
std::vector< TH1 * > * fVarHistS_smooth
Definition: MethodCuts.h:207
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
Definition: MethodCuts.cxx:878
std::vector< Double_t > * fMeanB
Definition: MethodCuts.h:198
#define ClassDef(name, id)
Definition: Rtypes.h:297
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:27
std::vector< EFitParameters > * fFitParams
Definition: MethodCuts.h:171
void Init(void)
default initialisation called by all constructors
Definition: MethodCuts.cxx:220
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
BinarySearchTree * fBinaryTreeS
Definition: MethodCuts.h:180
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample ...
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
std::vector< TH1 * > * fVarHistB_smooth
Definition: MethodCuts.h:208
Class that contains all the data information.
Definition: DataSetInfo.h:60
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
std::vector< PDF * > * fVarPdfB
Definition: MethodCuts.h:210
void Train(void)
training method: here the cuts are optimised for the training sample
Definition: MethodCuts.cxx:578
std::vector< Int_t > * fRangeSign
Definition: MethodCuts.h:193
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fEffRef
Definition: MethodCuts.h:192
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
void DeclareOptions()
define the options (their key words) that can be set in the option string.
Definition: MethodCuts.cxx:319
Double_t ** fCutMin
Definition: MethodCuts.h:184
Double_t * fCutRangeMin
Definition: MethodCuts.h:175
Double_t fEffSMax
Definition: MethodCuts.h:174
TRandom * fRandom
Definition: MethodCuts.h:194
Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular ...
Definition: MethodCuts.h:61
EFitMethodType fFitMethod
Definition: MethodCuts.h:168
double Double_t
Definition: RtypesCore.h:55
std::vector< Double_t > * fMeanS
Definition: MethodCuts.h:197
Bool_t fNegEffWarning
Definition: MethodCuts.h:213
int type
Definition: TGX11.cxx:120
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
The TH1 histogram class.
Definition: TH1.h:56
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
TString fFitMethodS
Definition: MethodCuts.h:167
std::vector< Double_t > * fRmsB
Definition: MethodCuts.h:200
Double_t GetRarity(Double_t, Types::ESBType) const
compute rarity: where PDF(x) is the PDF of the classifier&#39;s signal or background distribution ...
Definition: MethodCuts.h:108
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
Abstract ClassifierFactory template that handles arbitrary types.
virtual ~MethodCuts(void)
destructor
Definition: MethodCuts.cxx:270
Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
Definition: MethodCuts.h:100
static const Double_t fgMaxAbsCutVal
Definition: MethodCuts.h:130
void ProcessOptions()
process user options.
Definition: MethodCuts.cxx:363
Double_t ** fCutMax
Definition: MethodCuts.h:185
void PrintCuts(Double_t effS) const
print cuts
Definition: MethodCuts.cxx:465
A TTree object has a header with a name and a title.
Definition: TTree.h:78
Double_t GetSeparation(PDF *=0, PDF *=0) const
compute "separation" defined as
Definition: MethodCuts.h:101
Double_t GetmuTransform(TTree *)
Definition: MethodCuts.h:103
Double_t * fTmpCutMax
Definition: MethodCuts.h:187
Interface for a fitter &#39;target&#39;.
Definition: IFitterTarget.h:44
TString * fAllVarsI
Definition: MethodCuts.h:188
Double_t GetTrainingEfficiency(const TString &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
virtual void ReadWeightsFromStream(std::istream &)=0
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.
Definition: MethodCuts.cxx:211
std::vector< Interval * > fCutRange
Definition: MethodCuts.h:177
static MethodCuts * DynamicCast(IMethod *method)
Definition: MethodCuts.h:74
BinarySearchTree * fBinaryTreeB
Definition: MethodCuts.h:181
Double_t fEffSMin
Definition: MethodCuts.h:173
A simple Binary search tree including a volume search method.
std::vector< TH1 * > * fVarHistB
Definition: MethodCuts.h:206