Logo ROOT   6.07/09
Reference Guide
MethodMLP.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 #ifndef ROOT_TMVA_MethodMLP
39 #define ROOT_TMVA_MethodMLP
40 
41 //////////////////////////////////////////////////////////////////////////
42 // //
43 // MethodMLP //
44 // //
45 // Multilayer Perceptron built off of MethodANNBase //
46 // //
47 //////////////////////////////////////////////////////////////////////////
48 
49 #include <vector>
50 #ifndef ROOT_TString
51 #include "TString.h"
52 #endif
53 #ifndef ROOT_TTree
54 #include "TTree.h"
55 #endif
56 #ifndef ROOT_TObjArray
57 #include "TObjArray.h"
58 #endif
59 #ifndef ROOT_TRandom3
60 #include "TRandom3.h"
61 #endif
62 #ifndef ROOT_TH1F
63 #include "TH1F.h"
64 #endif
65 #ifndef ROOT_TMatrixDfwd
66 #include "TMatrixDfwd.h"
67 #endif
68 
69 #ifndef ROOT_TMVA_IFitterTarget
70 #include "TMVA/IFitterTarget.h"
71 #endif
72 #ifndef ROOT_TMVA_MethodBase
73 #include "TMVA/MethodBase.h"
74 #endif
75 #ifndef ROOT_TMVA_MethodANNBase
76 #include "TMVA/MethodANNBase.h"
77 #endif
78 #ifndef ROOT_TMVA_TNeuron
79 #include "TMVA/TNeuron.h"
80 #endif
81 #ifndef ROOT_TMVA_TActivation
82 #include "TMVA/TActivation.h"
83 #endif
84 #ifndef ROOT_TMVA_ConvergenceTest
85 #include "TMVA/ConvergenceTest.h"
86 #endif
87 
88 #define MethodMLP_UseMinuit__
89 #undef MethodMLP_UseMinuit__
90 
91 namespace TMVA {
92 
93  class MethodMLP : public MethodANNBase, public IFitterTarget, public ConvergenceTest {
94 
95  public:
96 
97  // standard constructors
98  MethodMLP( const TString& jobName,
99  const TString& methodTitle,
100  DataSetInfo& theData,
101  const TString& theOption );
102 
103  MethodMLP( DataSetInfo& theData,
104  const TString& theWeightFile );
105 
106  virtual ~MethodMLP();
107 
108  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
109 
110  void Train();
111  // for GA
112  Double_t ComputeEstimator ( std::vector<Double_t>& parameters );
113  Double_t EstimatorFunction( std::vector<Double_t>& parameters );
114 
115  enum ETrainingMethod { kBP=0, kBFGS, kGA };
117 
119  Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper=0 );
120 
121  protected:
122 
123  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
124  void MakeClassSpecific( std::ostream&, const TString& ) const;
125 
126  // get help message text
127  void GetHelpMessage() const;
128 
129 
130  private:
131 
132  // the option handling methods
133  void DeclareOptions();
134  void ProcessOptions();
135 
136  // general helper functions
137  void Train( Int_t nEpochs );
138  void Init();
139  void InitializeLearningRates(); // although this is only needed by backprop
140 
141  // used as a measure of success in all minimization techniques
143 
144  // BFGS functions
145  void BFGSMinimize( Int_t nEpochs );
146  void SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &Buffer );
147  void SteepestDir( TMatrixD &Dir );
148  Bool_t GetHessian( TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta );
149  void SetDir( TMatrixD &Hessian, TMatrixD &Dir );
150  Double_t DerivDir( TMatrixD &Dir );
151  Bool_t LineSearch( TMatrixD &Dir, std::vector<Double_t> &Buffer, Double_t* dError=0 ); //zjh
152  void ComputeDEDw();
153  void SimulateEvent( const Event* ev );
154  void SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha );
155  Double_t GetError();
156  Double_t GetMSEErr( const Event* ev, UInt_t index = 0 ); //zjh
157  Double_t GetCEErr( const Event* ev, UInt_t index = 0 ); //zjh
158 
159  // backpropagation functions
160  void BackPropagationMinimize( Int_t nEpochs );
161  void TrainOneEpoch();
162  void Shuffle( Int_t* index, Int_t n );
163  void DecaySynapseWeights(Bool_t lateEpoch );
164  void TrainOneEvent( Int_t ievt);
165  Double_t GetDesiredOutput( const Event* ev );
166  void UpdateNetwork( Double_t desired, Double_t eventWeight=1.0 );
167  void UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight=1.0);
168  void CalculateNeuronDeltas();
169  void UpdateSynapses();
170  void AdjustSynapseWeights();
171 
172  // faster backpropagation
173  void TrainOneEventFast( Int_t ievt, Float_t*& branchVar, Int_t& type );
174 
175  // genetic algorithm functions
176  void GeneticMinimize();
177 
178 
179 #ifdef MethodMLP_UseMinuit__
180  // minuit functions -- commented out because they rely on a static pointer
181  void MinuitMinimize();
182  static MethodMLP* GetThisPtr();
183  static void IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
184  void FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
185 #endif
186 
187  // general
188  bool fUseRegulator; // zjh
189  bool fCalculateErrors; // compute inverse hessian matrix at the end of the training
190  Double_t fPrior; // zjh
191  std::vector<Double_t> fPriorDev; // zjh
192  void GetApproxInvHessian ( TMatrixD& InvHessian, bool regulate=true ); //rank-1 approximation, neglect 2nd derivatives. //zjh
193  void UpdateRegulators(); // zjh
194  void UpdatePriors(); // zjh
196 
197  ETrainingMethod fTrainingMethod; // method of training, BP or GA
198  TString fTrainMethodS; // training method option param
199 
200  Float_t fSamplingFraction; // fraction of events which is sampled for training
201  Float_t fSamplingEpoch; // fraction of epochs where sampling is used
202  Float_t fSamplingWeight; // changing factor for event weights when sampling is turned on
203  Bool_t fSamplingTraining; // The training sample is sampled
204  Bool_t fSamplingTesting; // The testing sample is sampled
205 
206  // BFGS variables
207  Double_t fLastAlpha; // line search variable
208  Double_t fTau; // line search variable
209  Int_t fResetStep; // reset time (how often we clear hessian matrix)
210 
211  // backpropagation variable
212  Double_t fLearnRate; // learning rate for synapse weight adjustments
213  Double_t fDecayRate; // decay rate for above learning rate
214  EBPTrainingMode fBPMode; // backprop learning mode (sequential or batch)
215  TString fBpModeS; // backprop learning mode option string (sequential or batch)
216  Int_t fBatchSize; // batch size, only matters if in batch learning mode
217  Int_t fTestRate; // test for overtraining performed at each #th epochs
218  Bool_t fEpochMon; // create and fill epoch-wise monitoring histograms (makes outputfile big!)
219 
220  // genetic algorithm variables
221  Int_t fGA_nsteps; // GA settings: number of steps
222  Int_t fGA_preCalc; // GA settings: number of pre-calc steps
223  Int_t fGA_SC_steps; // GA settings: SC_steps
224  Int_t fGA_SC_rate; // GA settings: SC_rate
225  Double_t fGA_SC_factor; // GA settings: SC_factor
226 
227  // regression, storage of deviations
228  std::vector<std::pair<Float_t,Float_t> >* fDeviationsFromTargets; // deviation from the targets, event weight
229 
230  Float_t fWeightRange; // suppress outliers for the estimator calculation
231 
232 #ifdef MethodMLP_UseMinuit__
233  // minuit variables -- commented out because they rely on a static pointer
234  Int_t fNumberOfWeights; // Minuit: number of weights
235  static MethodMLP* fgThis; // Minuit: this pointer
236 #endif
237 
238  // debugging flags
239  static const Int_t fgPRINT_ESTIMATOR_INC = 10; // debug flags
240  static const Bool_t fgPRINT_SEQ = kFALSE; // debug flags
241  static const Bool_t fgPRINT_BATCH = kFALSE; // debug flags
242 
243  ClassDef(MethodMLP,0); // Multi-layer perceptron implemented specifically for TMVA
244  };
245 
246 } // namespace TMVA
247 
248 #endif
Float_t fSamplingFraction
Definition: MethodMLP.h:200
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1703
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1002
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:138
void Init()
default initializations
Definition: MethodMLP.cxx:164
Double_t fLastAlpha
Definition: MethodMLP.h:207
static const Bool_t fgPRINT_SEQ
Definition: MethodMLP.h:240
Double_t GetError()
Definition: MethodMLP.cxx:965
float Float_t
Definition: RtypesCore.h:53
Int_t fGA_SC_rate
Definition: MethodMLP.h:224
Bool_t fSamplingTesting
Definition: MethodMLP.h:204
Int_t fResetStep
Definition: MethodMLP.h:209
void TrainOneEpoch()
train network over a single epoch/cyle of events
Definition: MethodMLP.cxx:1141
TString fTrainMethodS
Definition: MethodMLP.h:198
EAnalysisType
Definition: Types.h:128
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:773
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:807
Double_t fDecayRate
Definition: MethodMLP.h:213
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1220
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: TrainingMetho...
Definition: MethodMLP.cxx:191
Int_t fGA_nsteps
Definition: MethodMLP.h:221
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:289
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:352
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:486
bool HasInverseHessian()
Definition: MethodMLP.h:118
Int_t fGA_preCalc
Definition: MethodMLP.h:222
Float_t fSamplingEpoch
Definition: MethodMLP.h:201
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:949
ETrainingMethod fTrainingMethod
Definition: MethodMLP.h:197
void Shuffle(Int_t *index, Int_t n)
Input: index: the array to shuffle n: the size of the array Output: index: the shuffled indexes This ...
Definition: MethodMLP.cxx:1188
#define ClassDef(name, id)
Definition: Rtypes.h:254
void(* FCN)(Int_t &npar, Double_t *gin, Double_t &f, Double_t *u, Int_t flag)
Definition: testMinim.cxx:39
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1496
EBPTrainingMode fBPMode
Definition: MethodMLP.h:214
Double_t fGA_SC_factor
Definition: MethodMLP.h:225
void ComputeDEDw()
Definition: MethodMLP.cxx:696
bool fUseRegulator
Definition: MethodMLP.h:188
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1256
Bool_t fEpochMon
Definition: MethodMLP.h:218
Int_t fUpdateLimit
Definition: MethodMLP.h:195
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1400
std::vector< Double_t > fPriorDev
Definition: MethodMLP.h:191
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1019
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1344
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1692
void InitializeLearningRates()
initialize learning rates of synapses, used only by backpropagation
Definition: MethodMLP.cxx:275
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Definition: MethodMLP.cxx:1206
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1275
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:786
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1537
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with backpropagation algorithm
Definition: MethodMLP.cxx:1036
Float_t fWeightRange
Definition: MethodMLP.h:230
unsigned int UInt_t
Definition: RtypesCore.h:42
static const Int_t fgPRINT_ESTIMATOR_INC
Definition: MethodMLP.h:239
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:839
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1285
static const Bool_t fgPRINT_BATCH
Definition: MethodMLP.h:241
TString fBpModeS
Definition: MethodMLP.h:215
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:671
double f(double x)
void CalculateNeuronDeltas()
have each neuron calculate its delta by backpropagation
Definition: MethodMLP.cxx:1316
double Double_t
Definition: RtypesCore.h:55
int type
Definition: TGX11.cxx:120
Double_t fLearnRate
Definition: MethodMLP.h:212
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:733
Double_t fPrior
Definition: MethodMLP.h:190
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1422
Double_t fTau
Definition: MethodMLP.h:208
Int_t fBatchSize
Definition: MethodMLP.h:216
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:152
Abstract ClassifierFactory template that handles arbitrary types.
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:244
Float_t fSamplingWeight
Definition: MethodMLP.h:202
bool fCalculateErrors
Definition: MethodMLP.h:189
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1373
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1381
std::vector< std::pair< Float_t, Float_t > > * fDeviationsFromTargets
Definition: MethodMLP.h:228
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:824
void UpdateRegulators()
Definition: MethodMLP.cxx:1456
Int_t fGA_SC_steps
Definition: MethodMLP.h:223
const Int_t n
Definition: legend1.C:16
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition: MethodMLP.cxx:90
Bool_t fSamplingTraining
Definition: MethodMLP.h:203