Logo ROOT  
Reference Guide
MethodFDA.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodFDA *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Function discriminant analysis (FDA). This simple classifier *
12 * fits any user-defined TFormula (via option configuration string) to *
13 * the training data by requiring a formula response of 1 (0) to signal *
14 * (background) events. The parameter fitting is done via the abstract *
15 * class FitterBase, featuring Monte Carlo sampling, Genetic *
16 * Algorithm, Simulated Annealing, MINUIT and combinations of these. *
17 * *
18 * Authors (alphabetical): *
19 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
20 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
21 * *
22 * Copyright (c) 2005-2010: *
23 * CERN, Switzerland *
24 * MPI-K Heidelberg, Germany *
25 * *
26 * Redistribution and use in source and binary forms, with or without *
27 * modification, are permitted according to the terms listed in LICENSE *
28 * (http://tmva.sourceforge.net/LICENSE) *
29 **********************************************************************************/
30
31#ifndef ROOT_TMVA_MethodFDA
32#define ROOT_TMVA_MethodFDA
33
34//////////////////////////////////////////////////////////////////////////
35// //
36// MethodFDA //
37// //
38// Function discriminant analysis (FDA). This simple classifier //
39// fits any user-defined TFormula (via option configuration string) to //
40// the training data by requiring a formula response of 1 (0) to signal //
41// (background) events. The parameter fitting is done via the abstract //
42// class FitterBase, featuring Monte Carlo sampling, Genetic //
43// Algorithm, Simulated Annealing, MINUIT and combinations of these. //
44// //
45// Can compute one-dimensional regression //
46// //
47//////////////////////////////////////////////////////////////////////////
48
49#include "TMVA/MethodBase.h"
50#include "TMVA/IFitterTarget.h"
51#include <vector>
52
53class TFormula;
54
55namespace TMVA {
56
57 class Interval;
58 class Event;
59 class FitterBase;
60
61 class MethodFDA : public MethodBase, public IFitterTarget {
62
63 public:
64
65 MethodFDA( const TString& jobName,
66 const TString& methodTitle,
67 DataSetInfo& theData,
68 const TString& theOption = "");
69
70 MethodFDA( DataSetInfo& theData,
71 const TString& theWeightFile);
72
73 virtual ~MethodFDA( void );
74
75 Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
76
77 // training method
78 void Train( void );
79
81
82 void AddWeightsXMLTo ( void* parent ) const;
83
84 void ReadWeightsFromStream( std::istream & i );
85 void ReadWeightsFromXML ( void* wghtnode );
86
87 // calculate the MVA value
88 Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
89
90 virtual const std::vector<Float_t>& GetRegressionValues();
91 virtual const std::vector<Float_t>& GetMulticlassValues();
92
93 void Init( void );
94
95 // ranking of input variables
96 const Ranking* CreateRanking() { return 0; }
97
98 Double_t EstimatorFunction( std::vector<Double_t>& );
99
100 // no check of options at this place
101 void CheckSetup() {}
102
103 protected:
104
105 // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
106 void MakeClassSpecific( std::ostream&, const TString& ) const;
107
108 // get help message text
109 void GetHelpMessage() const;
110
111 private:
112
113 // compute multiclass values
114 void CalculateMulticlassValues( const TMVA::Event*& evt, std::vector<Double_t>& parameters, std::vector<Float_t>& values);
115
116
117 // create and interpret formula expression and compute estimator
118 void CreateFormula ();
119 Double_t InterpretFormula( const Event*, std::vector<Double_t>::iterator begin, std::vector<Double_t>::iterator end );
120
121 // clean up
122 void ClearAll();
123
124 // print fit results
125 void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
126
127 // the option handling methods
128 void DeclareOptions();
129 void ProcessOptions();
130
131 TString fFormulaStringP; // string with function
132 TString fParRangeStringP; // string with ranges of parameters
133 TString fFormulaStringT; // string with function
134 TString fParRangeStringT; // string with ranges of parameters
135
136 TFormula* fFormula; // the discrimination function
137 UInt_t fNPars; // number of parameters
138 std::vector<Interval*> fParRange; // ranges of parameters
139 std::vector<Double_t> fBestPars; // the pars that optimise (minimise) the estimator
140 TString fFitMethod; // estimator optimisation method
141 TString fConverger; // fitmethod uses fConverger as intermediate step to converge into local minimas
142 FitterBase* fFitter; // the fitter used in the training
143 IFitterTarget* fConvergerFitter; // intermediate fitter
144
145
146 // sum of weights (this should become centrally available through the dataset)
147 Double_t fSumOfWeightsSig; // sum of weights (signal)
148 Double_t fSumOfWeightsBkg; // sum of weights (background)
149 Double_t fSumOfWeights; // sum of weights
150
151 //
152 Int_t fOutputDimensions; // number of output values
153
154 ClassDef(MethodFDA,0); // Function Discriminant Analysis
155 };
156
157} // namespace TMVA
158
159#endif // MethodFDA_H
int Int_t
Definition: RtypesCore.h:45
unsigned int UInt_t
Definition: RtypesCore.h:46
bool Bool_t
Definition: RtypesCore.h:63
double Double_t
Definition: RtypesCore.h:59
#define ClassDef(name, id)
Definition: Rtypes.h:325
int type
Definition: TGX11.cxx:121
The Formula class.
Definition: TFormula.h:87
Class that contains all the data information.
Definition: DataSetInfo.h:62
Base class for TMVA fitters.
Definition: FitterBase.h:51
Interface for a fitter 'target'.
Definition: IFitterTarget.h:44
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
virtual void ReadWeightsFromStream(std::istream &)=0
Function discriminant analysis (FDA).
Definition: MethodFDA.h:61
void Train(void)
FDA training.
Definition: MethodFDA.cxx:363
TString fFormulaStringT
Definition: MethodFDA.h:133
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Definition: MethodFDA.cxx:619
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised)
Definition: MethodFDA.cxx:435
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodFDA.h:101
virtual ~MethodFDA(void)
destructor
Definition: MethodFDA.cxx:326
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
Definition: MethodFDA.cxx:499
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodFDA.cxx:334
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
Definition: MethodFDA.cxx:637
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > &parameters, std::vector< Float_t > &values)
calculate the values for multiclass
Definition: MethodFDA.cxx:581
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
Definition: MethodFDA.cxx:604
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodFDA.cxx:553
Double_t fSumOfWeightsBkg
Definition: MethodFDA.h:148
Int_t fOutputDimensions
Definition: MethodFDA.h:152
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition: MethodFDA.cxx:86
void Init(void)
default initialisation
Definition: MethodFDA.cxx:124
void ClearAll()
delete and clear all class members
Definition: MethodFDA.cxx:346
std::vector< Interval * > fParRange
Definition: MethodFDA.h:138
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Definition: MethodFDA.cxx:421
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
Definition: MethodFDA.cxx:676
Double_t fSumOfWeightsSig
Definition: MethodFDA.h:147
TString fParRangeStringP
Definition: MethodFDA.h:132
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodFDA.cxx:518
TFormula * fFormula
Definition: MethodFDA.h:136
const Ranking * CreateRanking()
Definition: MethodFDA.h:96
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodFDA.cxx:530
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodFDA.cxx:240
std::vector< Double_t > fBestPars
Definition: MethodFDA.h:139
IFitterTarget * fConvergerFitter
Definition: MethodFDA.h:143
FitterBase * fFitter
Definition: MethodFDA.h:142
Double_t fSumOfWeights
Definition: MethodFDA.h:149
TString fParRangeStringT
Definition: MethodFDA.h:134
TString fFitMethod
Definition: MethodFDA.h:140
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
Definition: MethodFDA.cxx:183
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodFDA.cxx:163
TString fConverger
Definition: MethodFDA.h:141
void GetHelpMessage() const
get help message text
Definition: MethodFDA.cxx:721
TString fFormulaStringP
Definition: MethodFDA.h:131
Ranking for variables in method (implementation)
Definition: Ranking.h:48
EAnalysisType
Definition: Types.h:128
Basic string class.
Definition: TString.h:136
create variable transformations