ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
MethodMLP.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 //_______________________________________________________________________
39 //
40 // Multilayer Perceptron class built off of MethodANNBase
41 //_______________________________________________________________________
42 
43 #include "TMVA/MethodMLP.h"
44 
45 #include <vector>
46 #include <cmath>
47 
48 #include "TString.h"
49 #include "TTree.h"
50 #include "Riostream.h"
51 #include "TFitter.h"
52 #include "TMatrixD.h"
53 #include "TMath.h"
54 #include "TFile.h"
55 
56 #include "TMVA/Config.h"
57 #include "TMVA/ClassifierFactory.h"
58 #include "TMVA/DataSet.h"
59 #include "TMVA/DataSetInfo.h"
60 #include "TMVA/FitterBase.h"
61 #include "TMVA/GeneticFitter.h"
62 #include "TMVA/Interval.h"
63 #include "TMVA/MethodANNBase.h"
64 #include "TMVA/MsgLogger.h"
65 #include "TMVA/TNeuron.h"
66 #include "TMVA/TSynapse.h"
67 #include "TMVA/Timer.h"
68 #include "TMVA/Tools.h"
69 #include "TMVA/Types.h"
70 
71 #ifdef MethodMLP_UseMinuit__
72 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
73 Bool_t MethodMLP_UseMinuit = kTRUE;
74 #endif
75 
76 REGISTER_METHOD(MLP)
77 
78 ClassImp(TMVA::MethodMLP)
79 
80 using std::vector;
81 
82 ////////////////////////////////////////////////////////////////////////////////
83 /// standard constructor
84 
85 TMVA::MethodMLP::MethodMLP( const TString& jobName,
86  const TString& methodTitle,
87  DataSetInfo& theData,
88  const TString& theOption,
89  TDirectory* theTargetDir )
90  : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption, theTargetDir ),
91  fUseRegulator(false), fCalculateErrors(false),
92  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
93  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
94  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
95  fSamplingTraining(false), fSamplingTesting(false),
96  fLastAlpha(0.0), fTau(0.),
97  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
98  fBPMode(kSequential), fBpModeS("None"),
99  fBatchSize(0), fTestRate(0), fEpochMon(false),
100  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
101  fGA_SC_rate(0), fGA_SC_factor(0.0),
102  fDeviationsFromTargets(0),
103  fWeightRange (1.0)
104 {
105 }
106 
107 ////////////////////////////////////////////////////////////////////////////////
108 /// constructor from a weight file
109 
111  const TString& theWeightFile,
112  TDirectory* theTargetDir )
113  : MethodANNBase( Types::kMLP, theData, theWeightFile, theTargetDir ),
114  fUseRegulator(false), fCalculateErrors(false),
115  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
116  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
117  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
118  fSamplingTraining(false), fSamplingTesting(false),
119  fLastAlpha(0.0), fTau(0.),
120  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
121  fBPMode(kSequential), fBpModeS("None"),
122  fBatchSize(0), fTestRate(0), fEpochMon(false),
123  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
124  fGA_SC_rate(0), fGA_SC_factor(0.0),
125  fDeviationsFromTargets(0),
126  fWeightRange (1.0)
127 {
128 }
129 
130 ////////////////////////////////////////////////////////////////////////////////
131 /// destructor
132 /// nothing to be done
133 
135 {
136 }
137 
138 ////////////////////////////////////////////////////////////////////////////////
139 /// MLP can handle classification with 2 classes and regression with one regression-target
140 
142 {
143  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
144  if (type == Types::kMulticlass ) return kTRUE;
145  if (type == Types::kRegression ) return kTRUE;
146 
147  return kFALSE;
148 }
149 
150 ////////////////////////////////////////////////////////////////////////////////
151 /// default initializations
152 
154 {
155  // the minimum requirement to declare an event signal-like
156  SetSignalReferenceCut( 0.5 );
157 #ifdef MethodMLP_UseMinuit__
158  fgThis = this;
159 #endif
160 }
161 
162 ////////////////////////////////////////////////////////////////////////////////
163 /// define the options (their key words) that can be set in the option string
164 /// know options:
165 /// TrainingMethod <string> Training method
166 /// available values are: BP Back-Propagation <default>
167 /// GA Genetic Algorithm (takes a LONG time)
168 ///
169 /// LearningRate <float> NN learning rate parameter
170 /// DecayRate <float> Decay rate for learning parameter
171 /// TestRate <int> Test for overtraining performed at each #th epochs
172 ///
173 /// BPMode <string> Back-propagation learning mode
174 /// available values are: sequential <default>
175 /// batch
176 ///
177 /// BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
178 /// -1 for BatchSize=number_of_events
179 
181 {
182  DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
183  "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
184  AddPreDefVal(TString("BP"));
185  AddPreDefVal(TString("GA"));
186  AddPreDefVal(TString("BFGS"));
187 
188  DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
189  DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
190  DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
191  DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
192 
193  DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
194  DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
195  DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
196 
197  DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
198  DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
199 
200  DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
201  DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
202 
203  DeclareOptionRef(fBpModeS="sequential", "BPMode",
204  "Back-propagation learning mode: sequential or batch");
205  AddPreDefVal(TString("sequential"));
206  AddPreDefVal(TString("batch"));
207 
208  DeclareOptionRef(fBatchSize=-1, "BatchSize",
209  "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
210 
211  DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
212  "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
213 
214  DeclareOptionRef(fSteps=-1, "ConvergenceTests",
215  "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
216 
217  DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
218  "Use regulator to avoid over-training"); //zjh
219  DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
220  "Maximum times of regulator update"); //zjh
221  DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
222  "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
223 
224  DeclareOptionRef(fWeightRange=1.0, "WeightRange",
225  "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
226 
227 }
228 
229 ////////////////////////////////////////////////////////////////////////////////
230 /// process user options
231 
233 {
235 
236 
237  if (IgnoreEventsWithNegWeightsInTraining()) {
238  Log() << kINFO
239  << "Will ignore negative events in training!"
240  << Endl;
241  }
242 
243 
244  if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
245  else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
246  else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
247 
248  if (fBpModeS == "sequential") fBPMode = kSequential;
249  else if (fBpModeS == "batch") fBPMode = kBatch;
250 
251  // InitializeLearningRates();
252 
253  if (fBPMode == kBatch) {
254  Data()->SetCurrentType(Types::kTraining);
255  Int_t numEvents = Data()->GetNEvents();
256  if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
257  }
258 }
259 
260 ////////////////////////////////////////////////////////////////////////////////
261 /// initialize learning rates of synapses, used only by backpropagation
262 
264 {
265  Log() << kDEBUG << "Initialize learning rates" << Endl;
266  TSynapse *synapse;
267  Int_t numSynapses = fSynapses->GetEntriesFast();
268  for (Int_t i = 0; i < numSynapses; i++) {
269  synapse = (TSynapse*)fSynapses->At(i);
270  synapse->SetLearningRate(fLearnRate);
271  }
272 }
273 
274 ////////////////////////////////////////////////////////////////////////////////
275 /// calculate the estimator that training is attempting to minimize
276 
278 {
279  // sanity check
280  if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
281  Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
282  }
283 
284  Types::ETreeType saveType = Data()->GetCurrentType();
285  Data()->SetCurrentType(treeType);
286 
287  // if epochs are counted create monitoring histograms (only available for classification)
288  TString type = (treeType == Types::kTraining ? "train" : "test");
289  TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
290  TString nameB = name + "_B";
291  TString nameS = name + "_S";
292  Int_t nbin = 100;
293  Float_t limit = 2;
294  TH1* histS = 0;
295  TH1* histB = 0;
296  if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
297  histS = new TH1F( nameS, nameS, nbin, -limit, limit );
298  histB = new TH1F( nameB, nameB, nbin, -limit, limit );
299  }
300 
301  Double_t estimator = 0;
302 
303  // loop over all training events
304  Int_t nEvents = GetNEvents();
305  UInt_t nClasses = DataInfo().GetNClasses();
306  UInt_t nTgts = DataInfo().GetNTargets();
307 
308 
309  Float_t sumOfWeights = 0.f;
310  if( fWeightRange < 1.f ){
311  fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
312  }
313 
314  for (Int_t i = 0; i < nEvents; i++) {
315 
316  const Event* ev = GetEvent(i);
317 
318  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
319  && (saveType == Types::kTraining)){
320  continue;
321  }
322 
323  Double_t w = ev->GetWeight();
324 
325  ForceNetworkInputs( ev );
326  ForceNetworkCalculations();
327 
328  Double_t d = 0, v = 0;
329  if (DoRegression()) {
330  for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
331  v = GetOutputNeuron( itgt )->GetActivationValue();
332  Double_t targetValue = ev->GetTarget( itgt );
333  Double_t dt = v - targetValue;
334  d += (dt*dt);
335  }
336  estimator += d*w;
337  } else if (DoMulticlass() ) {
338  UInt_t cls = ev->GetClass();
339  if (fEstimator==kCE){
340  Double_t norm(0);
341  for (UInt_t icls = 0; icls < nClasses; icls++) {
342  Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
343  norm += exp( activationValue );
344  if(icls==cls)
345  d = exp( activationValue );
346  }
347  d = -TMath::Log(d/norm);
348  }
349  else{
350  for (UInt_t icls = 0; icls < nClasses; icls++) {
351  Double_t desired = (icls==cls) ? 1.0 : 0.0;
352  v = GetOutputNeuron( icls )->GetActivationValue();
353  d = (desired-v)*(desired-v);
354  }
355  }
356  estimator += d*w; //zjh
357  } else {
358  Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
359  v = GetOutputNeuron()->GetActivationValue();
360  if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
361  else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
362  estimator += d*w; //zjh
363  }
364 
365  if( fDeviationsFromTargets )
366  fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
367 
368  sumOfWeights += w;
369 
370 
371  // fill monitoring histograms
372  if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
373  else if (histB != 0) histB->Fill( float(v), float(w) );
374  }
375 
376 
377  if( fDeviationsFromTargets ) {
378  std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
379 
380  Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
381  estimator = 0.f;
382 
383  Float_t weightRangeCut = fWeightRange*sumOfWeights;
384  Float_t weightSum = 0.f;
385  for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
386  float deviation = (*itDev).first;
387  float devWeight = (*itDev).second;
388  weightSum += devWeight; // add the weight of this event
389  if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
390  estimator += devWeight*deviation;
391  }
392  }
393 
394  sumOfWeights = sumOfWeightsInRange;
395  delete fDeviationsFromTargets;
396  }
397 
398  if (histS != 0) fEpochMonHistS.push_back( histS );
399  if (histB != 0) fEpochMonHistB.push_back( histB );
400 
401  //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
402  //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
403  //else estimator = estimator*0.5/Float_t(nEvents);
404  if (DoRegression()) estimator = estimator/Float_t(sumOfWeights);
405  else if (DoMulticlass()) estimator = estimator/Float_t(sumOfWeights);
406  else estimator = estimator/Float_t(sumOfWeights);
407 
408 
409  //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
410 
411  Data()->SetCurrentType( saveType );
412 
413  // provide epoch-wise monitoring
414  if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
415  CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
416  }
417 
418  return estimator;
419 }
420 
421 ////////////////////////////////////////////////////////////////////////////////
422 
424 {
425  if (fNetwork == 0) {
426  //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
427  Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
428  SetAnalysisType(GetAnalysisType());
429  }
430  Log() << kDEBUG << "reinitalize learning rates" << Endl;
431  InitializeLearningRates();
432  PrintMessage("Training Network");
433 
434  Int_t nEvents=GetNEvents();
435  Int_t nSynapses=fSynapses->GetEntriesFast();
436  if (nSynapses>nEvents)
437  Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
438 
439 #ifdef MethodMLP_UseMinuit__
440  if (useMinuit) MinuitMinimize();
441 #else
442  if (fTrainingMethod == kGA) GeneticMinimize();
443  else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
444  else BackPropagationMinimize(nEpochs);
445 #endif
446 
447  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
448  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
449  if (fUseRegulator){
450  Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
451  UpdateRegulators();
452  Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
453  }
454 
455  if( fCalculateErrors || fUseRegulator )
456  {
457  Int_t numSynapses=fSynapses->GetEntriesFast();
458  fInvHessian.ResizeTo(numSynapses,numSynapses);
459  GetApproxInvHessian( fInvHessian ,false);
460  }
461 }
462 
463 ////////////////////////////////////////////////////////////////////////////////
464 /// train network with BFGS algorithm
465 
467 {
468  Timer timer( (fSteps>0?100:nEpochs), GetName() );
469 
470  // create histograms for overtraining monitoring
471  Int_t nbinTest = Int_t(nEpochs/fTestRate);
472  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
473  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
474  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
475  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
476 
477  Int_t nSynapses = fSynapses->GetEntriesFast();
478  Int_t nWeights = nSynapses;
479 
480  for (Int_t i=0;i<nSynapses;i++) {
481  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
482  synapse->SetDEDw(0.0);
483  }
484 
485  std::vector<Double_t> buffer( nWeights );
486  for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
487 
488  TMatrixD Dir ( nWeights, 1 );
489  TMatrixD Hessian ( nWeights, nWeights );
490  TMatrixD Gamma ( nWeights, 1 );
491  TMatrixD Delta ( nWeights, 1 );
492  Int_t RegUpdateCD=0; //zjh
493  Int_t RegUpdateTimes=0; //zjh
494  Double_t AccuError=0;
495 
496  Double_t trainE = -1;
497  Double_t testE = -1;
498 
499  fLastAlpha = 0.;
500 
501  if(fSamplingTraining || fSamplingTesting)
502  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
503 
504  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
505  timer.DrawProgressBar( 0 );
506 
507  // start training cycles (epochs)
508  for (Int_t i = 0; i < nEpochs; i++) {
509  if (Float_t(i)/nEpochs < fSamplingEpoch) {
510  if ((i+1)%fTestRate == 0 || (i == 0)) {
511  if (fSamplingTraining) {
512  Data()->SetCurrentType( Types::kTraining );
513  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
514  Data()->CreateSampling();
515  }
516  if (fSamplingTesting) {
517  Data()->SetCurrentType( Types::kTesting );
518  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
519  Data()->CreateSampling();
520  }
521  }
522  }
523  else {
524  Data()->SetCurrentType( Types::kTraining );
525  Data()->InitSampling(1.0,1.0);
526  Data()->SetCurrentType( Types::kTesting );
527  Data()->InitSampling(1.0,1.0);
528  }
529  Data()->SetCurrentType( Types::kTraining );
530 
531  //zjh
532  if (fUseRegulator) {
533  UpdatePriors();
534  RegUpdateCD++;
535  }
536  //zjh
537 
538  SetGammaDelta( Gamma, Delta, buffer );
539 
540  if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
541  SteepestDir( Dir );
542  Hessian.UnitMatrix();
543  RegUpdateCD=0; //zjh
544  }
545  else {
546  if (GetHessian( Hessian, Gamma, Delta )) {
547  SteepestDir( Dir );
548  Hessian.UnitMatrix();
549  RegUpdateCD=0; //zjh
550  }
551  else SetDir( Hessian, Dir );
552  }
553 
554  Double_t dError=0; //zjh
555  if (DerivDir( Dir ) > 0) {
556  SteepestDir( Dir );
557  Hessian.UnitMatrix();
558  RegUpdateCD=0; //zjh
559  }
560  if (LineSearch( Dir, buffer, &dError )) { //zjh
561  Hessian.UnitMatrix();
562  SteepestDir( Dir );
563  RegUpdateCD=0; //zjh
564  if (LineSearch(Dir, buffer, &dError)) { //zjh
565  i = nEpochs;
566  Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
567  }
568  }
569 
570  //zjh+
571  if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
572  AccuError+=dError;
573 
574  if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
575  Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
576  UpdateRegulators();
577  Hessian.UnitMatrix();
578  RegUpdateCD=0;
579  RegUpdateTimes++;
580  AccuError=0;
581  }
582  //zjh-
583 
584  // monitor convergence of training and control sample
585  if ((i+1)%fTestRate == 0) {
586  //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
587  //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
588  trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
589  testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
590  fEstimatorHistTrain->Fill( i+1, trainE );
591  fEstimatorHistTest ->Fill( i+1, testE );
592 
593  Bool_t success = kFALSE;
594  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
595  success = kTRUE;
596  }
597  Data()->EventResult( success );
598 
599  SetCurrentValue( testE );
600  if (HasConverged()) {
601  if (Float_t(i)/nEpochs < fSamplingEpoch) {
602  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
603  i = newEpoch;
604  ResetConvergenceCounter();
605  }
606  else break;
607  }
608  }
609 
610  // draw progress
611  TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
612  if (fSteps > 0) {
613  Float_t progress = 0;
614  if (Float_t(i)/nEpochs < fSamplingEpoch)
615 // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
616  progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
617  else
618  {
619 // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
620  progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
621  }
622  Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
623  if (progress2>progress) progress=progress2; //zjh
624  timer.DrawProgressBar( Int_t(progress), convText );
625  }
626  else {
627  Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
628  if (progress<i) progress=i; //zjh
629  timer.DrawProgressBar( progress, convText ); //zjh
630  }
631 
632  // some verbose output
633  if (fgPRINT_SEQ) {
634  PrintNetwork();
635  WaitForKeyboard();
636  }
637  }
638 }
639 
640 ////////////////////////////////////////////////////////////////////////////////
641 
642 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
643 {
644  Int_t nWeights = fSynapses->GetEntriesFast();
645 
646  Int_t IDX = 0;
647  Int_t nSynapses = fSynapses->GetEntriesFast();
648  for (Int_t i=0;i<nSynapses;i++) {
649  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
650  Gamma[IDX++][0] = -synapse->GetDEDw();
651  }
652 
653  for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
654 
655  ComputeDEDw();
656 
657  IDX = 0;
658  for (Int_t i=0;i<nSynapses;i++)
659  {
660  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
661  Gamma[IDX++][0] += synapse->GetDEDw();
662  }
663 }
664 
665 ////////////////////////////////////////////////////////////////////////////////
666 
668 {
669  Int_t nSynapses = fSynapses->GetEntriesFast();
670  for (Int_t i=0;i<nSynapses;i++) {
671  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
672  synapse->SetDEDw( 0.0 );
673  }
674 
675  Int_t nEvents = GetNEvents();
676  Int_t nPosEvents = nEvents;
677  for (Int_t i=0;i<nEvents;i++) {
678 
679  const Event* ev = GetEvent(i);
680  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
681  && (Data()->GetCurrentType() == Types::kTraining)){
682  --nPosEvents;
683  continue;
684  }
685 
686  SimulateEvent( ev );
687 
688  for (Int_t j=0;j<nSynapses;j++) {
689  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
690  synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
691  }
692  }
693 
694  for (Int_t i=0;i<nSynapses;i++) {
695  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
696  Double_t DEDw=synapse->GetDEDw(); //zjh
697  if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
698  synapse->SetDEDw( DEDw / nPosEvents ); //zjh
699  }
700 }
701 
702 ////////////////////////////////////////////////////////////////////////////////
703 
705 {
706  Double_t eventWeight = ev->GetWeight();
707 
708  ForceNetworkInputs( ev );
709  ForceNetworkCalculations();
710 
711  if (DoRegression()) {
712  UInt_t ntgt = DataInfo().GetNTargets();
713  for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
714  Double_t desired = ev->GetTarget(itgt);
715  Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
716  GetOutputNeuron( itgt )->SetError(error);
717  }
718  } else if (DoMulticlass()) {
719  UInt_t nClasses = DataInfo().GetNClasses();
720  UInt_t cls = ev->GetClass();
721  for (UInt_t icls = 0; icls < nClasses; icls++) {
722  Double_t desired = ( cls==icls ? 1.0 : 0.0 );
723  Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
724  GetOutputNeuron( icls )->SetError(error);
725  }
726  } else {
727  Double_t desired = GetDesiredOutput( ev );
728  Double_t error=-1; //zjh
729  if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
730  else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
731  GetOutputNeuron()->SetError(error);
732  }
733 
734  CalculateNeuronDeltas();
735  for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
736  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
737  synapse->InitDelta();
738  synapse->CalculateDelta();
739  }
740 }
741 
742 ////////////////////////////////////////////////////////////////////////////////
743 
745 {
746  Int_t IDX = 0;
747  Int_t nSynapses = fSynapses->GetEntriesFast();
748 
749  for (Int_t i=0;i<nSynapses;i++) {
750  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
751  Dir[IDX++][0] = -synapse->GetDEDw();
752  }
753 }
754 
755 ////////////////////////////////////////////////////////////////////////////////
756 
758 {
759  TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
760  if ((Double_t) gd[0][0] == 0.) return kTRUE;
761  TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
762  TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
763  TMatrixD gHg(Gamma, TMatrixD::kTransposeMult, aHg);
764  Double_t a = 1 / (Double_t) gd[0][0];
765  Double_t f = 1 + ((Double_t)gHg[0][0]*a);
767  res *= f;
768  res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
770  res *= a;
771  Hessian += res;
772 
773  return kFALSE;
774 }
775 
776 ////////////////////////////////////////////////////////////////////////////////
777 
779 {
780  Int_t IDX = 0;
781  Int_t nSynapses = fSynapses->GetEntriesFast();
782  TMatrixD DEDw(nSynapses, 1);
783 
784  for (Int_t i=0;i<nSynapses;i++) {
785  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
786  DEDw[IDX++][0] = synapse->GetDEDw();
787  }
788 
789  dir = Hessian * DEDw;
790  for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
791 }
792 
793 ////////////////////////////////////////////////////////////////////////////////
794 
796 {
797  Int_t IDX = 0;
798  Int_t nSynapses = fSynapses->GetEntriesFast();
799  Double_t Result = 0.0;
800 
801  for (Int_t i=0;i<nSynapses;i++) {
802  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
803  Result += Dir[IDX++][0] * synapse->GetDEDw();
804  }
805  return Result;
806 }
807 
808 ////////////////////////////////////////////////////////////////////////////////
809 
810 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
811 {
812  Int_t IDX = 0;
813  Int_t nSynapses = fSynapses->GetEntriesFast();
814  Int_t nWeights = nSynapses;
815 
816  std::vector<Double_t> Origin(nWeights);
817  for (Int_t i=0;i<nSynapses;i++) {
818  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
819  Origin[i] = synapse->GetWeight();
820  }
821 
822  Double_t err1 = GetError();
823  Double_t errOrigin=err1; //zjh
824  Double_t alpha1 = 0.;
825  Double_t alpha2 = fLastAlpha;
826 
827 
828  if (alpha2 < 0.01) alpha2 = 0.01;
829  else if (alpha2 > 2.0) alpha2 = 2.0;
830  Double_t alpha_original = alpha2;
831  Double_t alpha3 = alpha2;
832 
833  SetDirWeights( Origin, Dir, alpha2 );
834  Double_t err2 = GetError();
835  //Double_t err2 = err1;
836  Double_t err3 = err2;
837  Bool_t bingo = kFALSE;
838 
839 
840  if (err1 > err2) {
841  for (Int_t i=0;i<100;i++) {
842  alpha3 *= fTau;
843  SetDirWeights(Origin, Dir, alpha3);
844  err3 = GetError();
845  if (err3 > err2) {
846  bingo = kTRUE;
847  break;
848  }
849  alpha1 = alpha2;
850  err1 = err2;
851  alpha2 = alpha3;
852  err2 = err3;
853  }
854  if (!bingo) {
855  SetDirWeights(Origin, Dir, 0.);
856  return kTRUE;
857  }
858  }
859  else {
860  for (Int_t i=0;i<100;i++) {
861  alpha2 /= fTau;
862  if (i==50) {
863  Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
864  alpha2 = -alpha_original;
865  }
866  SetDirWeights(Origin, Dir, alpha2);
867  err2 = GetError();
868  if (err1 > err2) {
869  bingo = kTRUE;
870  break;
871  }
872  alpha3 = alpha2;
873  err3 = err2;
874  }
875  if (!bingo) {
876  SetDirWeights(Origin, Dir, 0.);
877  Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
878  fLastAlpha = 0.05;
879  return kTRUE;
880  }
881  }
882 
883  if (alpha1>0 && alpha2>0 && alpha3 > 0) {
884  fLastAlpha = 0.5 * (alpha1 + alpha3 -
885  (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
886  - ( err2 - err1 ) / (alpha2 - alpha1 )));
887  }
888  else {
889  fLastAlpha = alpha2;
890  }
891 
892  fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
893 
894  SetDirWeights(Origin, Dir, fLastAlpha);
895 
896  // leaving these lines uncommented is a heavy price to pay for only a warning message
897  // (which shoulnd't appear anyway)
898  // --> about 15% of time is spent in the final GetError().
899  //
900  Double_t finalError = GetError();
901  if (finalError > err1) {
902  Log() << kWARNING << "Line search increased error! Something is wrong."
903  << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
904  << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
905  }
906 
907  for (Int_t i=0;i<nSynapses;i++) {
908  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
909  buffer[IDX] = synapse->GetWeight() - Origin[IDX];
910  IDX++;
911  }
912 
913  if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
914 
915  return kFALSE;
916 }
917 
918 ////////////////////////////////////////////////////////////////////////////////
919 
920 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
921 {
922  Int_t IDX = 0;
923  Int_t nSynapses = fSynapses->GetEntriesFast();
924 
925  for (Int_t i=0;i<nSynapses;i++) {
926  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
927  synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
928  IDX++;
929  }
930  if (fUseRegulator) UpdatePriors(); //zjh
931 }
932 
933 
934 ////////////////////////////////////////////////////////////////////////////////
935 
937 {
938  Int_t nEvents = GetNEvents();
939  UInt_t ntgts = GetNTargets();
940  Double_t Result = 0.;
941 
942  for (Int_t i=0;i<nEvents;i++) {
943  const Event* ev = GetEvent(i);
944 
945  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
946  && (Data()->GetCurrentType() == Types::kTraining)){
947  continue;
948  }
949  SimulateEvent( ev );
950 
951  Double_t error = 0.;
952  if (DoRegression()) {
953  for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
954  error += GetMSEErr( ev, itgt ); //zjh
955  }
956  } else if ( DoMulticlass() ){
957  for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
958  error += GetMSEErr( ev, icls );
959  }
960  } else {
961  if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
962  else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
963  }
964  Result += error * ev->GetWeight();
965  }
966  if (fUseRegulator) Result+=fPrior; //zjh
967  if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
968  return Result;
969 }
970 
971 ////////////////////////////////////////////////////////////////////////////////
972 
974 {
975  Double_t error = 0;
976  Double_t output = GetOutputNeuron( index )->GetActivationValue();
977  Double_t target = 0;
978  if (DoRegression()) target = ev->GetTarget( index );
979  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
980  else target = GetDesiredOutput( ev );
981 
982  error = 0.5*(output-target)*(output-target); //zjh
983 
984  return error;
985 
986 }
987 
988 ////////////////////////////////////////////////////////////////////////////////
989 
991 {
992  Double_t error = 0;
993  Double_t output = GetOutputNeuron( index )->GetActivationValue();
994  Double_t target = 0;
995  if (DoRegression()) target = ev->GetTarget( index );
996  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
997  else target = GetDesiredOutput( ev );
998 
999  error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1000 
1001  return error;
1002 }
1003 
1004 ////////////////////////////////////////////////////////////////////////////////
1005 /// minimize estimator / train network with backpropagation algorithm
1006 
1008 {
1009  // Timer timer( nEpochs, GetName() );
1010  Timer timer( (fSteps>0?100:nEpochs), GetName() );
1011  Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1012 
1013  // create histograms for overtraining monitoring
1014  Int_t nbinTest = Int_t(nEpochs/fTestRate);
1015  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1016  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1017  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1018  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1019 
1020  if(fSamplingTraining || fSamplingTesting)
1021  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1022 
1023  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1024  timer.DrawProgressBar(0);
1025 
1026  // estimators
1027  Double_t trainE = -1;
1028  Double_t testE = -1;
1029 
1030  // start training cycles (epochs)
1031  for (Int_t i = 0; i < nEpochs; i++) {
1032 
1033  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1034  if ((i+1)%fTestRate == 0 || (i == 0)) {
1035  if (fSamplingTraining) {
1036  Data()->SetCurrentType( Types::kTraining );
1037  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1038  Data()->CreateSampling();
1039  }
1040  if (fSamplingTesting) {
1041  Data()->SetCurrentType( Types::kTesting );
1042  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1043  Data()->CreateSampling();
1044  }
1045  }
1046  }
1047  else {
1048  Data()->SetCurrentType( Types::kTraining );
1049  Data()->InitSampling(1.0,1.0);
1050  Data()->SetCurrentType( Types::kTesting );
1051  Data()->InitSampling(1.0,1.0);
1052  }
1053  Data()->SetCurrentType( Types::kTraining );
1054 
1055  TrainOneEpoch();
1056  DecaySynapseWeights(i >= lateEpoch);
1057 
1058  // monitor convergence of training and control sample
1059  if ((i+1)%fTestRate == 0) {
1060  trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1061  testE = CalculateEstimator( Types::kTesting, i ); // estimator for test samplea
1062  fEstimatorHistTrain->Fill( i+1, trainE );
1063  fEstimatorHistTest ->Fill( i+1, testE );
1064 
1065  Bool_t success = kFALSE;
1066  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1067  success = kTRUE;
1068  }
1069  Data()->EventResult( success );
1070 
1071  SetCurrentValue( testE );
1072  if (HasConverged()) {
1073  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1074  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1075  i = newEpoch;
1076  ResetConvergenceCounter();
1077  }
1078  else {
1079  if (lateEpoch > i) lateEpoch = i;
1080  else break;
1081  }
1082  }
1083  }
1084 
1085  // draw progress bar (add convergence value)
1086  TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1087  if (fSteps > 0) {
1088  Float_t progress = 0;
1089  if (Float_t(i)/nEpochs < fSamplingEpoch)
1090  progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1091  else
1092  progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1093 
1094  timer.DrawProgressBar( Int_t(progress), convText );
1095  }
1096  else {
1097  timer.DrawProgressBar( i, convText );
1098  }
1099  }
1100 }
1101 
1102 ////////////////////////////////////////////////////////////////////////////////
1103 /// train network over a single epoch/cyle of events
1104 
1106 {
1107  Int_t nEvents = Data()->GetNEvents();
1108 
1109  // randomize the order events will be presented, important for sequential mode
1110  Int_t* index = new Int_t[nEvents];
1111  for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1112  Shuffle(index, nEvents);
1113 
1114  // loop over all training events
1115  for (Int_t i = 0; i < nEvents; i++) {
1116 
1117  const Event * ev = GetEvent(index[i]);
1118  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1119  && (Data()->GetCurrentType() == Types::kTraining)){
1120  continue;
1121  }
1122 
1123  TrainOneEvent(index[i]);
1124 
1125  // do adjustments if in batch mode
1126  if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1127  AdjustSynapseWeights();
1128  if (fgPRINT_BATCH) {
1129  PrintNetwork();
1130  WaitForKeyboard();
1131  }
1132  }
1133 
1134  // debug in sequential mode
1135  if (fgPRINT_SEQ) {
1136  PrintNetwork();
1137  WaitForKeyboard();
1138  }
1139  }
1140 
1141  delete[] index;
1142 }
1143 
1144 ////////////////////////////////////////////////////////////////////////////////
1145 /// Input:
1146 /// index: the array to shuffle
1147 /// n: the size of the array
1148 /// Output:
1149 /// index: the shuffled indexes
1150 /// This method is used for sequential training
1151 
1153 {
1154  Int_t j, k;
1155  Int_t a = n - 1;
1156  for (Int_t i = 0; i < n; i++) {
1157  j = (Int_t) (frgen->Rndm() * a);
1158  if (j<n){ // address the 'worries' of coverity
1159  k = index[j];
1160  index[j] = index[i];
1161  index[i] = k;
1162  }
1163  }
1164 }
1165 
1166 ////////////////////////////////////////////////////////////////////////////////
1167 /// decay synapse weights
1168 /// in last 10 epochs, lower learning rate even more to find a good minimum
1169 
1171 {
1172  TSynapse* synapse;
1173  Int_t numSynapses = fSynapses->GetEntriesFast();
1174  for (Int_t i = 0; i < numSynapses; i++) {
1175  synapse = (TSynapse*)fSynapses->At(i);
1176  if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1177  else synapse->DecayLearningRate(fDecayRate);
1178  }
1179 }
1180 
1181 ////////////////////////////////////////////////////////////////////////////////
1182 /// fast per-event training
1183 
1185 {
1186  GetEvent(ievt);
1187 
1188  // as soon as we know how to get event weights, get that here
1189 
1190  // note: the normalization of event weights will affect the choice
1191  // of learning rate, one will have to experiment to get the right value.
1192  // in general, if the "average" event weight is 1, the learning rate
1193  // should be good if set around 0.02 (a good value if all event weights are 1)
1194  Double_t eventWeight = 1.0;
1195 
1196  // get the desired output of this event
1197  Double_t desired;
1198  if (type == 0) desired = fOutput->GetMin(); // background //zjh
1199  else desired = fOutput->GetMax(); // signal //zjh
1200 
1201  // force the value for each input neuron
1202  Double_t x;
1203  TNeuron* neuron;
1204 
1205  for (UInt_t j = 0; j < GetNvar(); j++) {
1206  x = branchVar[j];
1207  if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1208  neuron = GetInputNeuron(j);
1209  neuron->ForceValue(x);
1210  }
1211 
1212  ForceNetworkCalculations();
1213  UpdateNetwork(desired, eventWeight);
1214 }
1215 
1216 ////////////////////////////////////////////////////////////////////////////////
1217 /// train network over a single event
1218 /// this uses the new event model
1219 
1221 {
1222  // note: the normalization of event weights will affect the choice
1223  // of learning rate, one will have to experiment to get the right value.
1224  // in general, if the "average" event weight is 1, the learning rate
1225  // should be good if set around 0.02 (a good value if all event weights are 1)
1226 
1227  const Event * ev = GetEvent(ievt);
1228  Double_t eventWeight = ev->GetWeight();
1229  ForceNetworkInputs( ev );
1230  ForceNetworkCalculations();
1231  if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1232  if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1233  else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1234 }
1235 
1236 ////////////////////////////////////////////////////////////////////////////////
1237 /// get the desired output of this event
1238 
1240 {
1241  return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1242 }
1243 
1244 
1245 ////////////////////////////////////////////////////////////////////////////////
1246 /// update the network based on how closely
1247 /// the output matched the desired output
1248 
1250 {
1251  Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1252  if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1253  else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1254  else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1255  error *= eventWeight;
1256  GetOutputNeuron()->SetError(error);
1257  CalculateNeuronDeltas();
1258  UpdateSynapses();
1259 }
1260 
1261 ////////////////////////////////////////////////////////////////////////////////
1262 /// update the network based on how closely
1263 /// the output matched the desired output
1264 
1265 void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1266 {
1267  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1268  Double_t error = GetOutputNeuron( i )->GetActivationValue() - desired.at(i);
1269  error *= eventWeight;
1270  GetOutputNeuron( i )->SetError(error);
1271  }
1272  CalculateNeuronDeltas();
1273  UpdateSynapses();
1274 }
1275 
1276 
1277 ////////////////////////////////////////////////////////////////////////////////
1278 /// have each neuron calculate its delta by backpropagation
1279 
1281 {
1282  TNeuron* neuron;
1283  Int_t numNeurons;
1284  Int_t numLayers = fNetwork->GetEntriesFast();
1285  TObjArray* curLayer;
1286 
1287  // step backwards through the network (backpropagation)
1288  // deltas calculated starting at output layer
1289  for (Int_t i = numLayers-1; i >= 0; i--) {
1290  curLayer = (TObjArray*)fNetwork->At(i);
1291  numNeurons = curLayer->GetEntriesFast();
1292 
1293  for (Int_t j = 0; j < numNeurons; j++) {
1294  neuron = (TNeuron*) curLayer->At(j);
1295  neuron->CalculateDelta();
1296  }
1297  }
1298 }
1299 
1300 ////////////////////////////////////////////////////////////////////////////////
1301 /// create genetics class similar to GeneticCut
1302 /// give it vector of parameter ranges (parameters = weights)
1303 /// link fitness function of this class to ComputeEstimator
1304 /// instantiate GA (see MethodCuts)
1305 /// run it
1306 /// then this should exist for GA, Minuit and random sampling
1307 
1309 {
1310  PrintMessage("Minimizing Estimator with GA");
1311 
1312  // define GA parameters
1313  fGA_preCalc = 1;
1314  fGA_SC_steps = 10;
1315  fGA_SC_rate = 5;
1316  fGA_SC_factor = 0.95;
1317  fGA_nsteps = 30;
1318 
1319  // ranges
1320  std::vector<Interval*> ranges;
1321 
1322  Int_t numWeights = fSynapses->GetEntriesFast();
1323  for (Int_t ivar=0; ivar< numWeights; ivar++) {
1324  ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1325  }
1326 
1327  FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1328  gf->Run();
1329 
1330  Double_t estimator = CalculateEstimator();
1331  Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1332 }
1333 
1334 ////////////////////////////////////////////////////////////////////////////////
1335 /// interface to the estimate
1336 
1337 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1338 {
1339  return ComputeEstimator( parameters );
1340 }
1341 
1342 ////////////////////////////////////////////////////////////////////////////////
1343 /// this function is called by GeneticANN for GA optimization
1344 
1345 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1346 {
1347  TSynapse* synapse;
1348  Int_t numSynapses = fSynapses->GetEntriesFast();
1349 
1350  for (Int_t i = 0; i < numSynapses; i++) {
1351  synapse = (TSynapse*)fSynapses->At(i);
1352  synapse->SetWeight(parameters.at(i));
1353  }
1354  if (fUseRegulator) UpdatePriors(); //zjh
1355 
1356  Double_t estimator = CalculateEstimator();
1357 
1358  return estimator;
1359 }
1360 
1361 ////////////////////////////////////////////////////////////////////////////////
1362 /// update synapse error fields and adjust the weights (if in sequential mode)
1363 
1365 {
1366  TNeuron* neuron;
1367  Int_t numNeurons;
1368  TObjArray* curLayer;
1369  Int_t numLayers = fNetwork->GetEntriesFast();
1370 
1371  for (Int_t i = 0; i < numLayers; i++) {
1372  curLayer = (TObjArray*)fNetwork->At(i);
1373  numNeurons = curLayer->GetEntriesFast();
1374 
1375  for (Int_t j = 0; j < numNeurons; j++) {
1376  neuron = (TNeuron*) curLayer->At(j);
1377  if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1378  else neuron->UpdateSynapsesSequential();
1379  }
1380  }
1381 }
1382 
1383 ////////////////////////////////////////////////////////////////////////////////
1384 /// just adjust the synapse weights (should be called in batch mode)
1385 
1387 {
1388  TNeuron* neuron;
1389  Int_t numNeurons;
1390  TObjArray* curLayer;
1391  Int_t numLayers = fNetwork->GetEntriesFast();
1392 
1393  for (Int_t i = numLayers-1; i >= 0; i--) {
1394  curLayer = (TObjArray*)fNetwork->At(i);
1395  numNeurons = curLayer->GetEntriesFast();
1396 
1397  for (Int_t j = 0; j < numNeurons; j++) {
1398  neuron = (TNeuron*) curLayer->At(j);
1399  neuron->AdjustSynapseWeights();
1400  }
1401  }
1402 }
1403 
1404 ////////////////////////////////////////////////////////////////////////////////
1405 
1407 {
1408  fPrior=0;
1409  fPriorDev.clear();
1410  Int_t nSynapses = fSynapses->GetEntriesFast();
1411  for (Int_t i=0;i<nSynapses;i++) {
1412  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1413  fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1414  fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1415  }
1416 }
1417 
1418 ////////////////////////////////////////////////////////////////////////////////
1419 
1421 {
1422  TMatrixD InvH(0,0);
1423  GetApproxInvHessian(InvH);
1424  Int_t numSynapses=fSynapses->GetEntriesFast();
1425  Int_t numRegulators=fRegulators.size();
1426  Float_t gamma=0,
1427  variance=1.; // Gaussian noise
1428  std::vector<Int_t> nWDP(numRegulators);
1429  std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1430  for (int i=0;i<numSynapses;i++) {
1431  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1432  Int_t idx=fRegulatorIdx[i];
1433  nWDP[idx]++;
1434  trace[idx]+=InvH[i][i];
1435  gamma+=1-fRegulators[idx]*InvH[i][i];
1436  weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1437  }
1438  if (fEstimator==kMSE) {
1439  if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1440  else variance=CalculateEstimator( Types::kTraining, 0 );
1441  }
1442 
1443  //Log() << kDEBUG << Endl;
1444  for (int i=0;i<numRegulators;i++)
1445  {
1446  //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1447  fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1448  if (fRegulators[i]<0) fRegulators[i]=0;
1449  Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1450  }
1451  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1452  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1453 
1454  Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1455 
1456 }
1457 
1458 ////////////////////////////////////////////////////////////////////////////////
1459 
1460 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1461 {
1462  Int_t numSynapses=fSynapses->GetEntriesFast();
1463  InvHessian.ResizeTo( numSynapses, numSynapses );
1464  InvHessian=0;
1465  TMatrixD sens(numSynapses,1);
1466  TMatrixD sensT(1,numSynapses);
1467  Int_t nEvents = GetNEvents();
1468  for (Int_t i=0;i<nEvents;i++) {
1469  GetEvent(i);
1470  double outputValue=GetMvaValue(); // force calculation
1471  GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1472  CalculateNeuronDeltas();
1473  for (Int_t j = 0; j < numSynapses; j++){
1474  TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1475  synapses->InitDelta();
1476  synapses->CalculateDelta();
1477  sens[j][0]=sensT[0][j]=synapses->GetDelta();
1478  }
1479  if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1480  else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1481  }
1482 
1483  // TVectorD eValue(numSynapses);
1484  if (regulate) {
1485  for (Int_t i = 0; i < numSynapses; i++){
1486  InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1487  }
1488  }
1489  else {
1490  for (Int_t i = 0; i < numSynapses; i++){
1491  InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1492  }
1493  }
1494 
1495  InvHessian.Invert();
1496 
1497 }
1498 
1499 ////////////////////////////////////////////////////////////////////////////////
1500 
1502 {
1503  Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1504 
1505  // no hessian (old training file) or no error reqested
1506  if (!fCalculateErrors || errLower==0 || errUpper==0)
1507  return MvaValue;
1508 
1509  Double_t MvaUpper,MvaLower,median,variance;
1510  Int_t numSynapses=fSynapses->GetEntriesFast();
1511  if (fInvHessian.GetNcols()!=numSynapses) {
1512  Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1513  }
1514  TMatrixD sens(numSynapses,1);
1515  TMatrixD sensT(1,numSynapses);
1516  GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1517  //GetOutputNeuron()->SetError(1.);
1518  CalculateNeuronDeltas();
1519  for (Int_t i = 0; i < numSynapses; i++){
1520  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1521  synapses->InitDelta();
1522  synapses->CalculateDelta();
1523  sensT[0][i]=synapses->GetDelta();
1524  }
1525  sens.Transpose(sensT);
1526  TMatrixD sig=sensT*fInvHessian*sens;
1527  variance=sig[0][0];
1528  median=GetOutputNeuron()->GetValue();
1529 
1530  if (variance<0) {
1531  Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1532  variance=0;
1533  }
1534  variance=sqrt(variance);
1535 
1536  //upper
1537  MvaUpper=fOutput->Eval(median+variance);
1538  if(errUpper)
1539  *errUpper=MvaUpper-MvaValue;
1540 
1541  //lower
1542  MvaLower=fOutput->Eval(median-variance);
1543  if(errLower)
1544  *errLower=MvaValue-MvaLower;
1545 
1546  return MvaValue;
1547 }
1548 
1549 
1550 #ifdef MethodMLP_UseMinuit__
1551 
1552 ////////////////////////////////////////////////////////////////////////////////
1553 /// minimize using Minuit
1554 
1555 void TMVA::MethodMLP::MinuitMinimize()
1556 {
1557  fNumberOfWeights = fSynapses->GetEntriesFast();
1558 
1559  TFitter* tfitter = new TFitter( fNumberOfWeights );
1560 
1561  // minuit-specific settings
1562  Double_t args[10];
1563 
1564  // output level
1565  args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1566  tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1567  tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1568 
1569  double w[54];
1570 
1571  // init parameters
1572  for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1573  TString parName = Form("w%i", ipar);
1574  tfitter->SetParameter( ipar,
1575  parName, w[ipar], 0.1, 0, 0 );
1576  }
1577 
1578  // define the CFN function
1579  tfitter->SetFCN( &IFCN );
1580 
1581  // define fit strategy
1582  args[0] = 2;
1583  tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1584 
1585  // now do the fit !
1586  args[0] = 1e-04;
1587  tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1588 
1589  Bool_t doBetter = kFALSE;
1590  Bool_t doEvenBetter = kFALSE;
1591  if (doBetter) {
1592  args[0] = 1e-04;
1593  tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1594 
1595  if (doEvenBetter) {
1596  args[0] = 500;
1597  tfitter->ExecuteCommand( "MINOS", args, 1 );
1598  }
1599  }
1600 }
1601 
1602 _____________________________________________________________________________
1603 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1604 {
1605  // Evaluate the minimisation function ----------------------------------------------------
1606  //
1607  // Input parameters:
1608  // npars: number of currently variable parameters
1609  // CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1610  // fitPars: array of (constant and variable) parameters
1611  // iflag: indicates what is to be calculated (see example below)
1612  // grad: array of gradients
1613  //
1614  // Output parameters:
1615  // f: the calculated function value.
1616  // grad: the (optional) vector of first derivatives).
1617  // ---------------------------------------------------------------------------------------
1618  ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1619 }
1620 
1621 TTHREAD_TLS(Int_t) nc = 0;
1622 TTHREAD_TLS(double) minf = 1000000;
1623 
1624 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1625 {
1626  // first update the weights
1627  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1628  TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1629  synapse->SetWeight(fitPars[ipar]);
1630  }
1631 
1632  // now compute the estimator
1633  f = CalculateEstimator();
1634 
1635  nc++;
1636  if (f < minf) minf = f;
1637  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1638  Log() << kDEBUG << Endl;
1639  Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1640 }
1641 
1642 ////////////////////////////////////////////////////////////////////////////////
1643 /// global "this" pointer to be used in minuit
1644 
1645 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1646 {
1647  return fgThis;
1648 }
1649 
1650 #endif
1651 
1652 
1653 ////////////////////////////////////////////////////////////////////////////////
1654 /// write specific classifier response
1655 
1656 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1657 {
1658  MethodANNBase::MakeClassSpecific(fout, className);
1659 }
1660 
1661 ////////////////////////////////////////////////////////////////////////////////
1662 /// get help message text
1663 ///
1664 /// typical length of text line:
1665 /// "|--------------------------------------------------------------|"
1666 
1668 {
1669  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1670  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1671 
1672  Log() << Endl;
1673  Log() << col << "--- Short description:" << colres << Endl;
1674  Log() << Endl;
1675  Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1676  Log() << "forward multilayer perceptron impementation. The MLP has a user-" << Endl;
1677  Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1678  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1679  Log() << "signal and one background). " << Endl;
1680  Log() << Endl;
1681  Log() << col << "--- Performance optimisation:" << colres << Endl;
1682  Log() << Endl;
1683  Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1684  Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1685  Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1686  Log() << "number of input variables that have only little discrimination power. " << Endl;
1687  Log() << "" << Endl;
1688  Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1689  Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1690  Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1691  Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1692  Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1693  Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1694  Log() << "competitive results)." << Endl;
1695  Log() << Endl;
1696  Log() << col << "Overtraining: " << colres
1697  << "only the TMlpANN performs an explicit separation of the" << Endl;
1698  Log() << "full training sample into independent training and validation samples." << Endl;
1699  Log() << "We have found that in most high-energy physics applications the " << Endl;
1700  Log() << "avaliable degrees of freedom (training events) are sufficient to " << Endl;
1701  Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1702  Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1703  Log() << "the use of validation samples would only reduce the available training" << Endl;
1704  Log() << "information. However, if the perrormance on the training sample is " << Endl;
1705  Log() << "found to be significantly better than the one found with the inde-" << Endl;
1706  Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1707  Log() << "are printed to standard output at the end of each training job." << Endl;
1708  Log() << Endl;
1709  Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1710  Log() << Endl;
1711  Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1712  Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1713  Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1714  Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1715  Log() << "in favour of more neurons in the first hidden layer." << Endl;
1716  Log() << "" << Endl;
1717  Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1718  Log() << "adjustable weights is small compared to the training sample size," << Endl;
1719  Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1720 }
1721 
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1667
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3159
An array of TObjects.
Definition: TObjArray.h:39
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:973
tuple buffer
Definition: tree.py:99
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1461
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:134
Double_t Log(Double_t x)
Definition: TMath.h:526
void Init()
default initializations
Definition: MethodMLP.cxx:153
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition: TNeuron.cxx:87
void DecayLearningRate(Double_t rate)
Definition: TSynapse.h:68
Double_t GetError()
Definition: MethodMLP.cxx:936
float Float_t
Definition: RtypesCore.h:53
void SetDEDw(Double_t DEDw)
Definition: TSynapse.h:91
Config & gConfig()
Double_t GetDelta()
Definition: TSynapse.h:93
void TrainOneEpoch()
train network over a single epoch/cyle of events
Definition: MethodMLP.cxx:1105
EAnalysisType
Definition: Types.h:124
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:744
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:778
Basic string class.
Definition: TString.h:137
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:570
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
TArc * a
Definition: textangle.C:12
const Bool_t kFALSE
Definition: Rtypes.h:92
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1184
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition: TFitter.cxx:86
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: TrainingMetho...
Definition: MethodMLP.cxx:180
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:376
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:277
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:352
Int_t GetEntriesFast() const
Definition: TObjArray.h:66
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:466
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
Definition: TMatrixT.cxx:1202
ClassImp(TIterator) Bool_t TIterator return false
Compare two iterator objects.
Definition: TIterator.cxx:21
TFile * f
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:920
const char * Data() const
Definition: TString.h:349
double sqrt(double)
Tools & gTools()
Definition: Tools.cxx:79
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
Definition: TNeuron.cxx:268
TStopwatch timer
Definition: pirndm.C:37
Double_t x[n]
Definition: legend1.C:17
void Shuffle(Int_t *index, Int_t n)
Input: index: the array to shuffle n: the size of the array Output: index: the shuffled indexes This ...
Definition: MethodMLP.cxx:1152
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:79
int d
Definition: tornado.py:11
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition: TSynapse.cxx:113
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1460
std::vector< std::vector< double > > Data
void ComputeDEDw()
Definition: MethodMLP.cxx:667
virtual void ProcessOptions()
do nothing specific at this moment
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1220
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1388
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1364
std::vector< Float_t > & GetTargets()
Definition: Event.h:102
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:990
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1308
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1656
#define None
Definition: TGWin32.h:68
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:24
void InitializeLearningRates()
initialize learning rates of synapses, used only by backpropagation
Definition: MethodMLP.cxx:263
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Definition: MethodMLP.cxx:1170
const int nEvents
Definition: testRooFit.cxx:42
Double_t GetDEDw()
Definition: TSynapse.h:92
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1239
double gamma(double x)
void SetLearningRate(Double_t rate)
Definition: TSynapse.h:62
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:757
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1501
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with backpropagation algorithm
Definition: MethodMLP.cxx:1007
SVector< double, 2 > v
Definition: Dict.h:5
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
tuple w
Definition: qtexample.py:51
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:810
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1249
virtual void SetFCN(void *fcn)
Specify the address of the fitting algorithm (from the interpreter)
Definition: TFitter.cxx:545
void(* FCN)(Int_t &npar, Double_t *gin, Double_t &f, Double_t *u, Int_t flag)
Definition: testMinim.cxx:39
Double_t GetWeight()
Definition: TSynapse.h:59
void SetWeight(Double_t w)
Sets the weight of the synapse.
Definition: TSynapse.cxx:105
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:642
#define ClassImp(name)
Definition: Rtypes.h:279
void CalculateNeuronDeltas()
have each neuron calculate its delta by backpropagation
Definition: MethodMLP.cxx:1280
double Double_t
Definition: RtypesCore.h:55
void InitDelta()
Definition: TSynapse.h:89
Describe directory structure in memory.
Definition: TDirectory.h:44
int type
Definition: TGX11.cxx:120
void dir(char *path=0)
Definition: rootalias.C:30
The TH1 histogram class.
Definition: TH1.h:80
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:704
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1386
UInt_t GetClass() const
Definition: Event.h:86
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition: TNeuron.cxx:247
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition: TNeuron.cxx:229
#define name(a, b)
Definition: linkTestLib0.cpp:5
void SetWeight(Double_t weight)
set synapse weight
Definition: TSynapse.cxx:73
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:141
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:101
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption, TDirectory *theTargetDir=0)
#define REGISTER_METHOD(CLASS)
for example
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:232
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1337
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1345
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:183
Bool_t WriteOptionsReference() const
Definition: Config.h:66
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:127
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
TObject * At(Int_t idx) const
Definition: TObjArray.h:167
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:795
void UpdateRegulators()
Definition: MethodMLP.cxx:1420
double exp(double)
static void output(int code)
Definition: gifencode.c:226
const Bool_t kTRUE
Definition: Rtypes.h:91
void CalculateDelta()
calculate error field
Definition: TNeuron.cxx:119
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
const Int_t n
Definition: legend1.C:16
Definition: math.cpp:60
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...
Definition: TFitter.cxx:587