Logo ROOT   6.12/07
Reference Guide
MethodMLP.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 #ifndef ROOT_TMVA_MethodMLP
39 #define ROOT_TMVA_MethodMLP
40 
41 //////////////////////////////////////////////////////////////////////////
42 // //
43 // MethodMLP //
44 // //
45 // Multilayer Perceptron built off of MethodANNBase //
46 // //
47 //////////////////////////////////////////////////////////////////////////
48 
49 #include <vector>
50 #include "TString.h"
51 #include "TTree.h"
52 #include "TObjArray.h"
53 #include "TRandom3.h"
54 #include "TH1F.h"
55 #include "TMatrixDfwd.h"
56 
57 #include "TMVA/IFitterTarget.h"
58 #include "TMVA/MethodBase.h"
59 #include "TMVA/MethodANNBase.h"
60 #include "TMVA/TNeuron.h"
61 #include "TMVA/TActivation.h"
62 #include "TMVA/ConvergenceTest.h"
63 
64 #define MethodMLP_UseMinuit__
65 #undef MethodMLP_UseMinuit__
66 
67 namespace TMVA {
68 
69  class MethodMLP : public MethodANNBase, public IFitterTarget, public ConvergenceTest {
70 
71  public:
72 
73  // standard constructors
74  MethodMLP( const TString& jobName,
75  const TString& methodTitle,
76  DataSetInfo& theData,
77  const TString& theOption );
78 
79  MethodMLP( DataSetInfo& theData,
80  const TString& theWeightFile );
81 
82  virtual ~MethodMLP();
83 
84  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
85 
86  void Train();
87  // for GA
88  Double_t ComputeEstimator ( std::vector<Double_t>& parameters );
89  Double_t EstimatorFunction( std::vector<Double_t>& parameters );
90 
91  enum ETrainingMethod { kBP=0, kBFGS, kGA };
93 
95  Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper=0 );
96 
97  protected:
98 
99  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
100  void MakeClassSpecific( std::ostream&, const TString& ) const;
101 
102  // get help message text
103  void GetHelpMessage() const;
104 
105 
106  private:
107 
108  // the option handling methods
109  void DeclareOptions();
110  void ProcessOptions();
111 
112  // general helper functions
113  void Train( Int_t nEpochs );
114  void Init();
115  void InitializeLearningRates(); // although this is only needed by backprop
116 
117  // used as a measure of success in all minimization techniques
119 
120  // BFGS functions
121  void BFGSMinimize( Int_t nEpochs );
122  void SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &Buffer );
123  void SteepestDir( TMatrixD &Dir );
124  Bool_t GetHessian( TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta );
125  void SetDir( TMatrixD &Hessian, TMatrixD &Dir );
126  Double_t DerivDir( TMatrixD &Dir );
127  Bool_t LineSearch( TMatrixD &Dir, std::vector<Double_t> &Buffer, Double_t* dError=0 ); //zjh
128  void ComputeDEDw();
129  void SimulateEvent( const Event* ev );
130  void SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha );
131  Double_t GetError();
132  Double_t GetMSEErr( const Event* ev, UInt_t index = 0 ); //zjh
133  Double_t GetCEErr( const Event* ev, UInt_t index = 0 ); //zjh
134 
135  // backpropagation functions
136  void BackPropagationMinimize( Int_t nEpochs );
137  void TrainOneEpoch();
138  void Shuffle( Int_t* index, Int_t n );
139  void DecaySynapseWeights(Bool_t lateEpoch );
140  void TrainOneEvent( Int_t ievt);
141  Double_t GetDesiredOutput( const Event* ev );
142  void UpdateNetwork( Double_t desired, Double_t eventWeight=1.0 );
143  void UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight=1.0);
144  void CalculateNeuronDeltas();
145  void UpdateSynapses();
146  void AdjustSynapseWeights();
147 
148  // faster backpropagation
149  void TrainOneEventFast( Int_t ievt, Float_t*& branchVar, Int_t& type );
150 
151  // genetic algorithm functions
152  void GeneticMinimize();
153 
154 
155 #ifdef MethodMLP_UseMinuit__
156  // minuit functions -- commented out because they rely on a static pointer
157  void MinuitMinimize();
158  static MethodMLP* GetThisPtr();
159  static void IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
160  void FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
161 #endif
162 
163  // general
164  bool fUseRegulator; // zjh
165  bool fCalculateErrors; // compute inverse hessian matrix at the end of the training
166  Double_t fPrior; // zjh
167  std::vector<Double_t> fPriorDev; // zjh
168  void GetApproxInvHessian ( TMatrixD& InvHessian, bool regulate=true ); //rank-1 approximation, neglect 2nd derivatives. //zjh
169  void UpdateRegulators(); // zjh
170  void UpdatePriors(); // zjh
172 
173  ETrainingMethod fTrainingMethod; // method of training, BP or GA
174  TString fTrainMethodS; // training method option param
175 
176  Float_t fSamplingFraction; // fraction of events which is sampled for training
177  Float_t fSamplingEpoch; // fraction of epochs where sampling is used
178  Float_t fSamplingWeight; // changing factor for event weights when sampling is turned on
179  Bool_t fSamplingTraining; // The training sample is sampled
180  Bool_t fSamplingTesting; // The testing sample is sampled
181 
182  // BFGS variables
183  Double_t fLastAlpha; // line search variable
184  Double_t fTau; // line search variable
185  Int_t fResetStep; // reset time (how often we clear hessian matrix)
186 
187  // back propagation variable
188  Double_t fLearnRate; // learning rate for synapse weight adjustments
189  Double_t fDecayRate; // decay rate for above learning rate
190  EBPTrainingMode fBPMode; // backprop learning mode (sequential or batch)
191  TString fBpModeS; // backprop learning mode option string (sequential or batch)
192  Int_t fBatchSize; // batch size, only matters if in batch learning mode
193  Int_t fTestRate; // test for overtraining performed at each #th epochs
194  Bool_t fEpochMon; // create and fill epoch-wise monitoring histograms (makes outputfile big!)
195 
196  // genetic algorithm variables
197  Int_t fGA_nsteps; // GA settings: number of steps
198  Int_t fGA_preCalc; // GA settings: number of pre-calc steps
199  Int_t fGA_SC_steps; // GA settings: SC_steps
200  Int_t fGA_SC_rate; // GA settings: SC_rate
201  Double_t fGA_SC_factor; // GA settings: SC_factor
202 
203  // regression, storage of deviations
204  std::vector<std::pair<Float_t,Float_t> >* fDeviationsFromTargets; // deviation from the targets, event weight
205 
206  Float_t fWeightRange; // suppress outliers for the estimator calculation
207 
208 #ifdef MethodMLP_UseMinuit__
209  // minuit variables -- commented out because they rely on a static pointer
210  Int_t fNumberOfWeights; // Minuit: number of weights
211  static MethodMLP* fgThis; // Minuit: this pointer
212 #endif
213 
214  // debugging flags
215  static const Int_t fgPRINT_ESTIMATOR_INC = 10; // debug flags
216  static const Bool_t fgPRINT_SEQ = kFALSE; // debug flags
217  static const Bool_t fgPRINT_BATCH = kFALSE; // debug flags
218 
219  ClassDef(MethodMLP,0); // Multi-layer perceptron implemented specifically for TMVA
220  };
221 
222 } // namespace TMVA
223 
224 #endif
Float_t fSamplingFraction
Definition: MethodMLP.h:176
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1719
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1007
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:140
void Init()
default initializations
Definition: MethodMLP.cxx:166
Double_t fLastAlpha
Definition: MethodMLP.h:183
static const Bool_t fgPRINT_SEQ
Definition: MethodMLP.h:216
Double_t GetError()
Definition: MethodMLP.cxx:970
float Float_t
Definition: RtypesCore.h:53
Int_t fGA_SC_rate
Definition: MethodMLP.h:200
Bool_t fSamplingTesting
Definition: MethodMLP.h:180
Int_t fResetStep
Definition: MethodMLP.h:185
void TrainOneEpoch()
train network over a single epoch/cycle of events
Definition: MethodMLP.cxx:1146
TString fTrainMethodS
Definition: MethodMLP.h:174
EAnalysisType
Definition: Types.h:125
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:778
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:812
Double_t fDecayRate
Definition: MethodMLP.h:189
Basic string class.
Definition: TString.h:125
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1226
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodMLP.cxx:197
Int_t fGA_nsteps
Definition: MethodMLP.h:197
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:294
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:352
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:491
bool HasInverseHessian()
Definition: MethodMLP.h:94
Int_t fGA_preCalc
Definition: MethodMLP.h:198
Float_t fSamplingEpoch
Definition: MethodMLP.h:177
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:954
ETrainingMethod fTrainingMethod
Definition: MethodMLP.h:173
void Shuffle(Int_t *index, Int_t n)
Input:
Definition: MethodMLP.cxx:1194
#define ClassDef(name, id)
Definition: Rtypes.h:320
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1512
EBPTrainingMode fBPMode
Definition: MethodMLP.h:190
Double_t fGA_SC_factor
Definition: MethodMLP.h:201
void ComputeDEDw()
Definition: MethodMLP.cxx:701
bool fUseRegulator
Definition: MethodMLP.h:164
Class that contains all the data information.
Definition: DataSetInfo.h:60
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1262
Bool_t fEpochMon
Definition: MethodMLP.h:194
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1708
Multilayer Perceptron class built off of MethodANNBase.
Definition: MethodMLP.h:69
Int_t fUpdateLimit
Definition: MethodMLP.h:171
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1416
std::vector< Double_t > fPriorDev
Definition: MethodMLP.h:167
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1024
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1360
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
Definition: MethodMLP.cxx:280
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Definition: MethodMLP.cxx:1212
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1281
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:791
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1553
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Definition: MethodMLP.cxx:1041
Float_t fWeightRange
Definition: MethodMLP.h:206
unsigned int UInt_t
Definition: RtypesCore.h:42
static const Int_t fgPRINT_ESTIMATOR_INC
Definition: MethodMLP.h:215
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:844
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1290
static const Bool_t fgPRINT_BATCH
Definition: MethodMLP.h:217
TString fBpModeS
Definition: MethodMLP.h:191
const Bool_t kFALSE
Definition: RtypesCore.h:88
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:676
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Definition: MethodMLP.cxx:1332
double Double_t
Definition: RtypesCore.h:55
int type
Definition: TGX11.cxx:120
Double_t fLearnRate
Definition: MethodMLP.h:188
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:738
Double_t fPrior
Definition: MethodMLP.h:166
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1438
Double_t fTau
Definition: MethodMLP.h:184
Int_t fBatchSize
Definition: MethodMLP.h:192
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:154
Abstract ClassifierFactory template that handles arbitrary types.
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:249
Float_t fSamplingWeight
Definition: MethodMLP.h:178
bool fCalculateErrors
Definition: MethodMLP.h:165
Check for convergence.
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1389
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1397
std::vector< std::pair< Float_t, Float_t > > * fDeviationsFromTargets
Definition: MethodMLP.h:204
Interface for a fitter &#39;target&#39;.
Definition: IFitterTarget.h:44
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:829
void UpdateRegulators()
Definition: MethodMLP.cxx:1472
Int_t fGA_SC_steps
Definition: MethodMLP.h:199
Base class for all TMVA methods using artificial neural networks.
Definition: MethodANNBase.h:62
const Int_t n
Definition: legend1.C:16
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition: MethodMLP.cxx:92
Bool_t fSamplingTraining
Definition: MethodMLP.h:179