ROOT  6.06/09
Reference Guide
MethodPDERS.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPDERS *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Multidimensional Likelihood using the "Probability density estimator *
12  * range search" (PDERS) method suggested in *
13  * T. Carli and B. Koblitz, NIM A 501, 576 (2003) *
14  * *
15  * The multidimensional PDFs for signal and background are modeled *
16  * by counting the events in the "vicinity" of a test point. The volume *
17  * that describes "vicinity" is user-defined through the option string. *
18  * A search method based on binary-trees is used to improve the selection *
19  * efficiency of the volume search. *
20  * *
21  * Authors (alphabetical): *
22  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
23  * Yair Mahalalel <Yair.Mahalalel@cern.ch> - CERN, Switzerland *
24  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
25  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
26  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
27  * *
28  * Copyright (c) 2005: *
29  * CERN, Switzerland *
30  * U. of Victoria, Canada *
31  * MPI-K Heidelberg, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 #ifndef ROOT_TMVA_MethodPDERS
39 #define ROOT_TMVA_MethodPDERS
40 
41 //////////////////////////////////////////////////////////////////////////
42 // //
43 // MethodPDERS //
44 // //
45 // Multidimensional Likelihood using the "Probability density //
46 // estimator range search" (PDERS) method //
47 // //
48 //////////////////////////////////////////////////////////////////////////
49 
50 #ifndef ROOT_TMVA_MethodBase
51 #include "TMVA/MethodBase.h"
52 #endif
53 #ifndef ROOT_TMVA_BinarySearchTree
54 #include "TMVA/BinarySearchTree.h"
55 #endif
56 #ifndef ROOT_TMVA_TVector
57 #ifndef ROOT_TVector
58 #include "TVector.h"
59 #endif
60 #endif
61 
62 namespace TMVA {
63 
64  class Volume;
65  class Event;
66 
67  class MethodPDERS : public MethodBase {
68 
69  public:
70 
71  MethodPDERS( const TString& jobName,
72  const TString& methodTitle,
73  DataSetInfo& theData,
74  const TString& theOption,
75  TDirectory* theTargetDir = 0 );
76 
77  MethodPDERS( DataSetInfo& theData,
78  const TString& theWeightFile,
79  TDirectory* theTargetDir = NULL );
80 
81  virtual ~MethodPDERS( void );
82 
83  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
84 
85 
86  // training method
87  void Train( void );
88 
89  // write weights to file
90  void WriteWeightsToStream( TFile& rf ) const;
91  void AddWeightsXMLTo( void* parent ) const;
92 
93  // read weights from file
94  void ReadWeightsFromStream( std::istream& istr );
95  void ReadWeightsFromStream( TFile& istr );
96  void ReadWeightsFromXML( void* wghtnode );
97 
98  // calculate the MVA value
99  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
100 
101  // calculate the MVA value
102  const std::vector<Float_t>& GetRegressionValues();
103  public:
104 
105  // for root finder
108 
109  // static pointer to this object
110  static MethodPDERS* ThisPDERS( void );
111 
112  protected:
113 
114  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
115  void MakeClassSpecific( std::ostream&, const TString& ) const;
116 
117  // get help message text
118  void GetHelpMessage() const;
119 
120  Volume* fHelpVolume; // auxiliary variable
121  Int_t fFcnCall; // number of external function calls (RootFinder)
122 
123  // accessors
124  BinarySearchTree* GetBinaryTree( void ) const { return fBinaryTree; }
125 
126  Double_t CKernelEstimate( const Event&, std::vector<const BinarySearchTreeNode*>&, Volume& );
127  void RKernelEstimate( const Event&, std::vector<const BinarySearchTreeNode*>&, Volume&, std::vector<Float_t> *pdfSum );
128 
129  Double_t ApplyKernelFunction( Double_t normalized_distance );
131  Double_t GetNormalizedDistance( const TMVA::Event &base_event,
132  const BinarySearchTreeNode &sample_event,
133  Double_t *dim_normalization);
136 
137  // ranking of input variables
138  const Ranking* CreateRanking() { return 0; }
139 
140  private:
141 
142  // the option handling methods
143  void DeclareOptions();
144  void ProcessOptions();
145 
146  // calculate the averages of the input variables needed for adaptive training
147  void CalcAverages();
148 
149  // create binary search trees for signal and background
151 
152  // get sample of training events
153  void GetSample( const Event &e, std::vector<const BinarySearchTreeNode*>& events, Volume *volume);
154 
155  // option
156  TString fVolumeRange; // option volume range
157  TString fKernelString; // option kernel estimator
158 
166  } fVRangeMode;
167 
169  kBox = 0,
173  kSinc3, // the sinc enumerators must be consecutive and in order!
184 
185  BinarySearchTree* fBinaryTree; // binary tree
186 
187  std::vector<Float_t>* fDelta; // size of volume
188  std::vector<Float_t>* fShift; // volume center
189  std::vector<Float_t> fAverageRMS; // average RMS of signal and background
190 
191  Float_t fScaleS; // weight for signal events
192  Float_t fScaleB; // weight for background events
193  Float_t fDeltaFrac; // fraction of RMS
194  Double_t fGaussSigma; // size of Gauss in adaptive volume
195  Double_t fGaussSigmaNorm;// size of Gauss in adaptive volume (normalised to dimensions)
196 
197  Double_t fNRegOut; // number of output dimensions for regression
198 
199  // input for adaptive volume adjustment
200  Float_t fNEventsMin; // minimum number of events in adaptive volume
201  Float_t fNEventsMax; // maximum number of events in adaptive volume
202  Float_t fMaxVIterations;// maximum number of iterations to adapt volume size
203  Float_t fInitialScale; // initial scale for adaptive volume
204 
205  Bool_t fInitializedVolumeEle; // is volume element initialized ?
206 
207  Int_t fkNNMin; // min number of events in kNN tree
208  Int_t fkNNMax; // max number of events in kNN tree
209 
210  Double_t fMax_distance; // maximum distance
211  Bool_t fPrinted; // print
212  Bool_t fNormTree; // binary-search tree is normalised
213 
214  void SetVolumeElement ( void );
215 
216  Double_t CRScalc ( const Event& );
217  void RRScalc ( const Event&, std::vector<Float_t>* count );
218 
219  Float_t GetError ( Float_t countS, Float_t countB,
220  Float_t sumW2S, Float_t sumW2B ) const;
221 
222  // This is a workaround for OSx where static thread_local data members are
223  // not supported. The C++ solution would indeed be the following:
224  static MethodPDERS*& GetMethodPDERSThreadLocal() {TTHREAD_TLS(MethodPDERS*) fgThisPDERS(nullptr); return fgThisPDERS;};
225  void UpdateThis();
226 
227  void Init( void );
228 
229  ClassDef(MethodPDERS,0) // Multi-dimensional probability density estimator range search (PDERS) method
230  };
231 
232 } // namespace TMVA
233 
234 #endif // MethodPDERS_H
void UpdateThis()
update static this pointer
virtual ~MethodPDERS(void)
destructor
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Double_t GetNormalizedDistance(const TMVA::Event &base_event, const BinarySearchTreeNode &sample_event, Double_t *dim_normalization)
We use Euclidian metric here. Might not be best or most efficient.
std::vector< Float_t > * fShift
Definition: MethodPDERS.h:188
Float_t fMaxVIterations
Definition: MethodPDERS.h:202
float Float_t
Definition: RtypesCore.h:53
BinarySearchTree * fBinaryTree
Definition: MethodPDERS.h:185
Double_t NormSinc(Double_t x)
NormSinc.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:45
EAnalysisType
Definition: Types.h:124
void CreateBinarySearchTree(Types::ETreeType type)
create binary search trees for signal and background
Double_t CKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &)
normalization factors so we can work with radius 1 hyperspheres
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Float_t fInitialScale
Definition: MethodPDERS.h:203
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
init the size of a volume element using a defined fraction of the volume containing the entire events...
Double_t fMax_distance
Definition: MethodPDERS.h:210
void Init(void)
default initialisation routine called by all constructors
static Double_t IGetVolumeContentForRoot(Double_t)
Interface to RootFinder.
void RKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &, std::vector< Float_t > *pdfSum)
normalization factors so we can work with radius 1 hyperspheres
Double_t x[n]
Definition: legend1.C:17
#define ClassDef(name, id)
Definition: Rtypes.h:254
void ReadWeightsFromStream(std::istream &istr)
read weight info from file
TString fVolumeRange
Definition: MethodPDERS.h:156
void GetHelpMessage() const
get help message text
void CalcAverages()
compute also average RMS values required for adaptive Gaussian
Double_t CRScalc(const Event &)
TString fKernelString
Definition: MethodPDERS.h:157
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: VolumeRangeMo...
Double_t fGaussSigma
Definition: MethodPDERS.h:194
void SetVolumeElement(void)
defines volume dimensions
void WriteWeightsToStream(TFile &rf) const
write training sample (TTree) to file
Bool_t fInitializedVolumeEle
Definition: MethodPDERS.h:205
unsigned int UInt_t
Definition: RtypesCore.h:42
BinarySearchTree * GetBinaryTree(void) const
Definition: MethodPDERS.h:124
void ReadWeightsFromXML(void *wghtnode)
static MethodPDERS * ThisPDERS(void)
static pointer to this object
void Train(void)
this is a dummy training: the preparation work to do is the construction of the binary tree as a poin...
Volume * fHelpVolume
Definition: MethodPDERS.h:120
std::vector< Float_t > * fDelta
Definition: MethodPDERS.h:187
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:41
enum TMVA::MethodPDERS::EKernelEstimator fKernelEstimator
int type
Definition: TGX11.cxx:120
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDERS can handle classification with 2 classes and regression with one or more regression-targets.
std::vector< Float_t > fAverageRMS
Definition: MethodPDERS.h:189
const std::vector< Float_t > & GetRegressionValues()
Abstract ClassifierFactory template that handles arbitrary types.
void AddWeightsXMLTo(void *parent) const
write weights to xml file
enum TMVA::MethodPDERS::EVolumeRangeMode fVRangeMode
void ProcessOptions()
process the options specified by the user
#define NULL
Definition: Rtypes.h:82
void GetSample(const Event &e, std::vector< const BinarySearchTreeNode * > &events, Volume *volume)
Double_t GetVolumeContentForRoot(Double_t)
count number of events in rescaled volume
const Ranking * CreateRanking()
Definition: MethodPDERS.h:138
MethodPDERS(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption, TDirectory *theTargetDir=0)
static MethodPDERS *& GetMethodPDERSThreadLocal()
Definition: MethodPDERS.h:224
Double_t ApplyKernelFunction(Double_t normalized_distance)
from the normalized euclidean distance calculate the distance for a certain kernel ...
Double_t fGaussSigmaNorm
Definition: MethodPDERS.h:195
Double_t LanczosFilter(Int_t level, Double_t x)
Lanczos Filter.
void RRScalc(const Event &, std::vector< Float_t > *count)
Float_t GetError(Float_t countS, Float_t countB, Float_t sumW2S, Float_t sumW2B) const
statistical error estimate for RS estimator
Double_t KernelNormalization(Double_t pdf)
Calculating the normalization factor only once (might need a reset at some point. ...