ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
MethodCuts.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCuts *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Multivariate optimisation of signal efficiency for given background *
12  * efficiency, using rectangular minimum and maximum requirements on *
13  * input variables *
14  * *
15  * Authors (alphabetical): *
16  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
17  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
18  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * LAPP, Annecy, France *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  **********************************************************************************/
32 
33 #ifndef ROOT_TMVA_MethodCuts
34 #define ROOT_TMVA_MethodCuts
35 
36 //////////////////////////////////////////////////////////////////////////
37 // //
38 // MethodCuts //
39 // //
40 // Multivariate optimisation of signal efficiency for given background //
41 // efficiency, using rectangular minimum and maximum requirements on //
42 // input variables //
43 // //
44 //////////////////////////////////////////////////////////////////////////
45 
46 #include <vector>
47 #include <map>
48 
49 #ifndef ROOT_TMVA_MethodBase
50 #include "TMVA/MethodBase.h"
51 #endif
52 #ifndef ROOT_TMVA_BinarySearchTree
53 #include "TMVA/BinarySearchTree.h"
54 #endif
55 #ifndef ROOT_TMVA_PDF
56 #include "TMVA/PDF.h"
57 #endif
58 #ifndef ROOT_TMVA_TMatrixDfwd
59 #ifndef ROOT_TMatrixDfwd
60 #include "TMatrixDfwd.h"
61 #endif
62 #endif
63 #ifndef ROOT_TMVA_IFitterTarget
64 #ifndef ROOT_IFitterTarget
65 #include "IFitterTarget.h"
66 #endif
67 #endif
68 
69 class TRandom;
70 
71 namespace TMVA {
72 
73  class Interval;
74 
75  class MethodCuts : public MethodBase, public IFitterTarget {
76 
77  public:
78 
79  MethodCuts( const TString& jobName,
80  const TString& methodTitle,
81  DataSetInfo& theData,
82  const TString& theOption = "MC:150:10000:",
83  TDirectory* theTargetFile = 0 );
84 
85  MethodCuts( DataSetInfo& theData,
86  const TString& theWeightFile,
87  TDirectory* theTargetDir = NULL );
88 
89  // this is a workaround which is necessary since CINT is not capable of handling dynamic casts
90  static MethodCuts* DynamicCast( IMethod* method ) { return dynamic_cast<MethodCuts*>(method); }
91 
92  virtual ~MethodCuts( void );
93 
94  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
95 
96  // training method
97  void Train( void );
98 
100 
101  void AddWeightsXMLTo ( void* parent ) const;
102 
103  void ReadWeightsFromStream( std::istream & i );
104  void ReadWeightsFromXML ( void* wghtnode );
105 
106  // calculate the MVA value (for CUTs this is just a dummy)
107  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
108 
109  // write method specific histos to target file
110  void WriteMonitoringHistosToFile( void ) const;
111 
112  // test the method
113  void TestClassification();
114 
115  // also overwrite --> not computed for cuts
116  Double_t GetSeparation ( TH1*, TH1* ) const { return -1; }
117  Double_t GetSeparation ( PDF* = 0, PDF* = 0 ) const { return -1; }
118  Double_t GetSignificance( void ) const { return -1; }
119  Double_t GetmuTransform ( TTree *) { return -1; }
122 
123  // rarity distributions (signal or background (default) is uniform in [0,1])
124  Double_t GetRarity( Double_t, Types::ESBType ) const { return 0; }
125 
126  // accessors for Minuit
127  Double_t ComputeEstimator( std::vector<Double_t> & );
128 
129  Double_t EstimatorFunction( std::vector<Double_t> & );
130  Double_t EstimatorFunction( Int_t ievt1, Int_t ievt2 );
131 
133 
134  // retrieve cut values for given signal efficiency
135  void PrintCuts( Double_t effS ) const;
136  Double_t GetCuts ( Double_t effS, std::vector<Double_t>& cutMin, std::vector<Double_t>& cutMax ) const;
137  Double_t GetCuts ( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const;
138 
139  // ranking of input variables (not available for cuts)
140  const Ranking* CreateRanking() { return 0; }
141 
142  void DeclareOptions();
143  void ProcessOptions();
144 
145  // maximum |cut| value
146  static const Double_t fgMaxAbsCutVal;
147 
148  // no check of options at this place
149  void CheckSetup() {}
150 
151  protected:
152 
153  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
154  void MakeClassSpecific( std::ostream&, const TString& ) const;
155 
156  // get help message text
157  void GetHelpMessage() const;
158 
159  private:
160 
161  // optimisation method
168 
169  // efficiency calculation method
170  // - kUseEventSelection: computes efficiencies from given data sample
171  // - kUsePDFs : creates smoothed PDFs from data samples, and
172  // uses this to compute efficiencies
175 
176  // improve the Monte Carlo by providing some additional information
181 
182  // general
183  TString fFitMethodS; // chosen fit method (string)
184  EFitMethodType fFitMethod; // chosen fit method
185  TString fEffMethodS; // chosen efficiency calculation method (string)
186  EEffMethod fEffMethod; // chosen efficiency calculation method
187  std::vector<EFitParameters>* fFitParams; // vector for series of fit methods
188  Double_t fTestSignalEff; // used to test optimized signal efficiency
189  Double_t fEffSMin; // used to test optimized signal efficiency
190  Double_t fEffSMax; // used to test optimized signal efficiency
191  Double_t* fCutRangeMin; // minimum of allowed cut range
192  Double_t* fCutRangeMax; // maximum of allowed cut range
193  std::vector<Interval*> fCutRange; // allowed ranges for cut optimisation
194 
195  // for the use of the binary tree method
198 
199  // MC method
200  Double_t** fCutMin; // minimum requirement
201  Double_t** fCutMax; // maximum requirement
202  Double_t* fTmpCutMin; // temporary minimum requirement
203  Double_t* fTmpCutMax; // temporary maximum requirement
204  TString* fAllVarsI; // what to do with variables
205 
206  // relevant for all methods
207  Int_t fNpar; // number of parameters in fit (default: 2*Nvar)
208  Double_t fEffRef; // reference efficiency
209  std::vector<Int_t>* fRangeSign; // used to match cuts to fit parameters (and vice versa)
210  TRandom* fRandom; // random generator for MC optimisation method
211 
212  // basic statistics
213  std::vector<Double_t>* fMeanS; // means of variables (signal)
214  std::vector<Double_t>* fMeanB; // means of variables (background)
215  std::vector<Double_t>* fRmsS; // RMSs of variables (signal)
216  std::vector<Double_t>* fRmsB; // RMSs of variables (background)
217 
218  TH1* fEffBvsSLocal; // intermediate eff. background versus eff signal histo
219 
220  // PDF section
221  std::vector<TH1*>* fVarHistS; // reference histograms (signal)
222  std::vector<TH1*>* fVarHistB; // reference histograms (background)
223  std::vector<TH1*>* fVarHistS_smooth; // smoothed reference histograms (signal)
224  std::vector<TH1*>* fVarHistB_smooth; // smoothed reference histograms (background)
225  std::vector<PDF*>* fVarPdfS; // reference PDFs (signal)
226  std::vector<PDF*>* fVarPdfB; // reference PDFs (background)
227 
228  // negative efficiencies
229  Bool_t fNegEffWarning; // flag risen in case of negative efficiency warning
230 
231 
232  // the definition of fit parameters can be different from the actual
233  // cut requirements; these functions provide the matching
234  void MatchParsToCuts( const std::vector<Double_t>&, Double_t*, Double_t* );
236 
237  void MatchCutsToPars( std::vector<Double_t>&, Double_t*, Double_t* );
238  void MatchCutsToPars( std::vector<Double_t>&, Double_t**, Double_t**, Int_t ibin );
239 
240  // creates PDFs in case these are used to compute efficiencies
241  // (corresponds to: EffMethod == kUsePDFs)
242  void CreateVariablePDFs( void );
243 
244  // returns signal and background efficiencies for given cuts - using event counting
245  void GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
246  Double_t& effS, Double_t& effB );
247  // returns signal and background efficiencies for given cuts - using PDFs
248  void GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
249  Double_t& effS, Double_t& effB );
250 
251  // default initialisation method called by all constructors
252  void Init( void );
253 
254  ClassDef(MethodCuts,0) // Multivariate optimisation of signal efficiency
255  };
256 
257 } // namespace TMVA
258 
259 #endif
std::vector< Double_t > * fRmsS
Definition: MethodCuts.h:215
EEffMethod fEffMethod
Definition: MethodCuts.h:186
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
Definition: MethodCuts.cxx:442
Double_t GetSignificance(void) const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
Definition: MethodCuts.h:118
TString fEffMethodS
Definition: MethodCuts.h:185
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA there are two requirements: 1) the signal efficiency m...
Definition: MethodCuts.cxx:888
void TestClassification()
nothing to test
Definition: MethodCuts.cxx:824
Double_t fTestSignalEff
Definition: MethodCuts.h:188
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodCuts.h:149
void SetTestSignalEfficiency(Double_t effS)
Definition: MethodCuts.h:132
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
Definition: MethodCuts.cxx:969
Double_t * fTmpCutMin
Definition: MethodCuts.h:202
EAnalysisType
Definition: Types.h:124
Double_t * fCutRangeMax
Definition: MethodCuts.h:192
std::vector< TH1 * > * fVarHistS
Definition: MethodCuts.h:221
Basic string class.
Definition: TString.h:137
const Ranking * CreateRanking()
Definition: MethodCuts.h:140
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
std::vector< PDF * > * fVarPdfS
Definition: MethodCuts.h:225
Double_t GetRarity(Double_t, Types::ESBType) const
compute rarity: R(x) = Integrate_[-oo..x] { PDF(x') dx' } where PDF(x) is the PDF of the classifier's...
Definition: MethodCuts.h:124
std::vector< TH1 * > * fVarHistS_smooth
Definition: MethodCuts.h:223
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
Definition: MethodCuts.cxx:875
std::vector< Double_t > * fMeanB
Definition: MethodCuts.h:214
#define ClassDef(name, id)
Definition: Rtypes.h:254
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:29
std::vector< EFitParameters > * fFitParams
Definition: MethodCuts.h:187
void Init(void)
default initialisation called by all constructors
Definition: MethodCuts.cxx:232
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
BinarySearchTree * fBinaryTreeS
Definition: MethodCuts.h:196
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
Definition: MethodCuts.cxx:561
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample ...
std::vector< TH1 * > * fVarHistB_smooth
Definition: MethodCuts.h:224
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
Definition: PDF.h:71
std::vector< PDF * > * fVarPdfB
Definition: MethodCuts.h:226
void Train(void)
training method: here the cuts are optimised for the training sample
Definition: MethodCuts.cxx:588
std::vector< Int_t > * fRangeSign
Definition: MethodCuts.h:209
MethodCuts(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="MC:150:10000:", TDirectory *theTargetFile=0)
void PrintCuts(Double_t effS) const
print cuts
Definition: MethodCuts.cxx:475
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fEffRef
Definition: MethodCuts.h:208
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: Method <strin...
Definition: MethodCuts.cxx:330
Double_t ** fCutMin
Definition: MethodCuts.h:200
Double_t * fCutRangeMin
Definition: MethodCuts.h:191
Double_t fEffSMax
Definition: MethodCuts.h:190
TRandom * fRandom
Definition: MethodCuts.h:210
Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
Definition: MethodCuts.h:116
EFitMethodType fFitMethod
Definition: MethodCuts.h:184
double Double_t
Definition: RtypesCore.h:55
std::vector< Double_t > * fMeanS
Definition: MethodCuts.h:213
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
Bool_t fNegEffWarning
Definition: MethodCuts.h:229
Describe directory structure in memory.
Definition: TDirectory.h:44
int type
Definition: TGX11.cxx:120
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
The TH1 histogram class.
Definition: TH1.h:80
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
TString fFitMethodS
Definition: MethodCuts.h:183
std::vector< Double_t > * fRmsB
Definition: MethodCuts.h:216
void GetHelpMessage() const
get help message text
virtual ~MethodCuts(void)
destructor
Definition: MethodCuts.cxx:282
static const Double_t fgMaxAbsCutVal
Definition: MethodCuts.h:146
void ProcessOptions()
process user options sanity check, do not allow the input variables to be normalised, because this only creates problems when interpreting the cuts
Definition: MethodCuts.cxx:373
Double_t ** fCutMax
Definition: MethodCuts.h:201
#define NULL
Definition: Rtypes.h:82
A TTree object has a header with a name and a title.
Definition: TTree.h:98
Double_t GetmuTransform(TTree *)
Definition: MethodCuts.h:119
Double_t * fTmpCutMax
Definition: MethodCuts.h:203
TString * fAllVarsI
Definition: MethodCuts.h:204
Double_t GetTrainingEfficiency(const TString &)
virtual void ReadWeightsFromStream(std::istream &)=0
Double_t GetSeparation(PDF *=0, PDF *=0) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
Definition: MethodCuts.h:117
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.
Definition: MethodCuts.cxx:223
std::vector< Interval * > fCutRange
Definition: MethodCuts.h:193
static MethodCuts * DynamicCast(IMethod *method)
Definition: MethodCuts.h:90
BinarySearchTree * fBinaryTreeB
Definition: MethodCuts.h:197
Double_t fEffSMin
Definition: MethodCuts.h:189
std::vector< TH1 * > * fVarHistB
Definition: MethodCuts.h:222