Logo ROOT   6.07/09
Reference Guide
MethodMLP.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 //_______________________________________________________________________
39 //
40 // Multilayer Perceptron class built off of MethodANNBase
41 //_______________________________________________________________________
42 
43 #include "TMVA/MethodMLP.h"
44 
45 #include "TMVA/Config.h"
46 #include "TMVA/Configurable.h"
47 #include "TMVA/ConvergenceTest.h"
48 #include "TMVA/ClassifierFactory.h"
49 #include "TMVA/DataSet.h"
50 #include "TMVA/DataSetInfo.h"
51 #include "TMVA/FitterBase.h"
52 #include "TMVA/GeneticFitter.h"
53 #include "TMVA/IFitterTarget.h"
54 #include "TMVA/IMethod.h"
55 #include "TMVA/Interval.h"
56 #include "TMVA/MethodANNBase.h"
57 #include "TMVA/MsgLogger.h"
58 #include "TMVA/TNeuron.h"
59 #include "TMVA/TSynapse.h"
60 #include "TMVA/Timer.h"
61 #include "TMVA/Tools.h"
62 #include "TMVA/Types.h"
63 
64 #include "TH1.h"
65 #include "TString.h"
66 #include "TTree.h"
67 #include "Riostream.h"
68 #include "TFitter.h"
69 #include "TMatrixD.h"
70 #include "TMath.h"
71 #include "TFile.h"
72 
73 #include <cmath>
74 #include <vector>
75 
76 #ifdef MethodMLP_UseMinuit__
77 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
78 Bool_t MethodMLP_UseMinuit = kTRUE;
79 #endif
80 
81 REGISTER_METHOD(MLP)
82 
83 ClassImp(TMVA::MethodMLP)
84 
85  using std::vector;
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// standard constructor
89 
90 TMVA::MethodMLP::MethodMLP( const TString& jobName,
91  const TString& methodTitle,
92  DataSetInfo& theData,
93  const TString& theOption)
94  : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
95  fUseRegulator(false), fCalculateErrors(false),
96  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
97  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
98  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
99  fSamplingTraining(false), fSamplingTesting(false),
100  fLastAlpha(0.0), fTau(0.),
101  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
102  fBPMode(kSequential), fBpModeS("None"),
103  fBatchSize(0), fTestRate(0), fEpochMon(false),
104  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
105  fGA_SC_rate(0), fGA_SC_factor(0.0),
106  fDeviationsFromTargets(0),
107  fWeightRange (1.0)
108 {
109 
110 }
111 
112 ////////////////////////////////////////////////////////////////////////////////
113 /// constructor from a weight file
114 
116  const TString& theWeightFile)
117  : MethodANNBase( Types::kMLP, theData, theWeightFile),
118  fUseRegulator(false), fCalculateErrors(false),
119  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
120  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
121  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
122  fSamplingTraining(false), fSamplingTesting(false),
123  fLastAlpha(0.0), fTau(0.),
124  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
125  fBPMode(kSequential), fBpModeS("None"),
126  fBatchSize(0), fTestRate(0), fEpochMon(false),
127  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
128  fGA_SC_rate(0), fGA_SC_factor(0.0),
129  fDeviationsFromTargets(0),
130  fWeightRange (1.0)
131 {
132 }
133 
134 ////////////////////////////////////////////////////////////////////////////////
135 /// destructor
136 /// nothing to be done
137 
139 {
140 }
141 
143 {
144  Train(NumCycles());
145 }
146 
147 
148 
149 ////////////////////////////////////////////////////////////////////////////////
150 /// MLP can handle classification with 2 classes and regression with one regression-target
151 
153 {
154  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
155  if (type == Types::kMulticlass ) return kTRUE;
156  if (type == Types::kRegression ) return kTRUE;
157 
158  return kFALSE;
159 }
160 
161 ////////////////////////////////////////////////////////////////////////////////
162 /// default initializations
163 
165 {
166  // the minimum requirement to declare an event signal-like
167  SetSignalReferenceCut( 0.5 );
168 #ifdef MethodMLP_UseMinuit__
169  fgThis = this;
170 #endif
171 }
172 
173 ////////////////////////////////////////////////////////////////////////////////
174 /// define the options (their key words) that can be set in the option string
175 /// know options:
176 /// TrainingMethod <string> Training method
177 /// available values are: BP Back-Propagation <default>
178 /// GA Genetic Algorithm (takes a LONG time)
179 ///
180 /// LearningRate <float> NN learning rate parameter
181 /// DecayRate <float> Decay rate for learning parameter
182 /// TestRate <int> Test for overtraining performed at each #th epochs
183 ///
184 /// BPMode <string> Back-propagation learning mode
185 /// available values are: sequential <default>
186 /// batch
187 ///
188 /// BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
189 /// -1 for BatchSize=number_of_events
190 
192 {
193  DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
194  "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
195  AddPreDefVal(TString("BP"));
196  AddPreDefVal(TString("GA"));
197  AddPreDefVal(TString("BFGS"));
198 
199  DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
200  DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
201  DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
202  DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
203 
204  DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
205  DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
206  DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
207 
208  DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
209  DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
210 
211  DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
212  DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
213 
214  DeclareOptionRef(fBpModeS="sequential", "BPMode",
215  "Back-propagation learning mode: sequential or batch");
216  AddPreDefVal(TString("sequential"));
217  AddPreDefVal(TString("batch"));
218 
219  DeclareOptionRef(fBatchSize=-1, "BatchSize",
220  "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
221 
222  DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
223  "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
224 
225  DeclareOptionRef(fSteps=-1, "ConvergenceTests",
226  "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
227 
228  DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
229  "Use regulator to avoid over-training"); //zjh
230  DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
231  "Maximum times of regulator update"); //zjh
232  DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
233  "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
234 
235  DeclareOptionRef(fWeightRange=1.0, "WeightRange",
236  "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
237 
238 }
239 
240 
241 ////////////////////////////////////////////////////////////////////////////////
242 /// process user options
243 
245 {
247 
248 
250  Log() << kINFO
251  << "Will ignore negative events in training!"
252  << Endl;
253  }
254 
255 
256  if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
257  else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
258  else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
259 
260  if (fBpModeS == "sequential") fBPMode = kSequential;
261  else if (fBpModeS == "batch") fBPMode = kBatch;
262 
263  // InitializeLearningRates();
264 
265  if (fBPMode == kBatch) {
267  Int_t numEvents = Data()->GetNEvents();
268  if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
269  }
270 }
271 
272 ////////////////////////////////////////////////////////////////////////////////
273 /// initialize learning rates of synapses, used only by backpropagation
274 
276 {
277  Log() << kDEBUG << "Initialize learning rates" << Endl;
278  TSynapse *synapse;
279  Int_t numSynapses = fSynapses->GetEntriesFast();
280  for (Int_t i = 0; i < numSynapses; i++) {
281  synapse = (TSynapse*)fSynapses->At(i);
282  synapse->SetLearningRate(fLearnRate);
283  }
284 }
285 
286 ////////////////////////////////////////////////////////////////////////////////
287 /// calculate the estimator that training is attempting to minimize
288 
290 {
291  // sanity check
292  if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
293  Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
294  }
295 
296  Types::ETreeType saveType = Data()->GetCurrentType();
297  Data()->SetCurrentType(treeType);
298 
299  // if epochs are counted create monitoring histograms (only available for classification)
300  TString type = (treeType == Types::kTraining ? "train" : "test");
301  TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
302  TString nameB = name + "_B";
303  TString nameS = name + "_S";
304  Int_t nbin = 100;
305  Float_t limit = 2;
306  TH1* histS = 0;
307  TH1* histB = 0;
308  if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
309  histS = new TH1F( nameS, nameS, nbin, -limit, limit );
310  histB = new TH1F( nameB, nameB, nbin, -limit, limit );
311  }
312 
313  Double_t estimator = 0;
314 
315  // loop over all training events
317  UInt_t nClasses = DataInfo().GetNClasses();
318  UInt_t nTgts = DataInfo().GetNTargets();
319 
320 
321  Float_t sumOfWeights = 0.f;
322  if( fWeightRange < 1.f ){
323  fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
324  }
325 
326  for (Int_t i = 0; i < nEvents; i++) {
327 
328  const Event* ev = GetEvent(i);
329 
331  && (saveType == Types::kTraining)){
332  continue;
333  }
334 
335  Double_t w = ev->GetWeight();
336 
337  ForceNetworkInputs( ev );
339 
340  Double_t d = 0, v = 0;
341  if (DoRegression()) {
342  for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
343  v = GetOutputNeuron( itgt )->GetActivationValue();
344  Double_t targetValue = ev->GetTarget( itgt );
345  Double_t dt = v - targetValue;
346  d += (dt*dt);
347  }
348  estimator += d*w;
349  } else if (DoMulticlass() ) {
350  UInt_t cls = ev->GetClass();
351  if (fEstimator==kCE){
352  Double_t norm(0);
353  for (UInt_t icls = 0; icls < nClasses; icls++) {
354  Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
355  norm += exp( activationValue );
356  if(icls==cls)
357  d = exp( activationValue );
358  }
359  d = -TMath::Log(d/norm);
360  }
361  else{
362  for (UInt_t icls = 0; icls < nClasses; icls++) {
363  Double_t desired = (icls==cls) ? 1.0 : 0.0;
364  v = GetOutputNeuron( icls )->GetActivationValue();
365  d = (desired-v)*(desired-v);
366  }
367  }
368  estimator += d*w; //zjh
369  } else {
370  Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
372  if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
373  else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
374  estimator += d*w; //zjh
375  }
376 
378  fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
379 
380  sumOfWeights += w;
381 
382 
383  // fill monitoring histograms
384  if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
385  else if (histB != 0) histB->Fill( float(v), float(w) );
386  }
387 
388 
389  if( fDeviationsFromTargets ) {
390  std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
391 
392  Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
393  estimator = 0.f;
394 
395  Float_t weightRangeCut = fWeightRange*sumOfWeights;
396  Float_t weightSum = 0.f;
397  for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
398  float deviation = (*itDev).first;
399  float devWeight = (*itDev).second;
400  weightSum += devWeight; // add the weight of this event
401  if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
402  estimator += devWeight*deviation;
403  }
404  }
405 
406  sumOfWeights = sumOfWeightsInRange;
407  delete fDeviationsFromTargets;
408  }
409 
410  if (histS != 0) fEpochMonHistS.push_back( histS );
411  if (histB != 0) fEpochMonHistB.push_back( histB );
412 
413  //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
414  //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
415  //else estimator = estimator*0.5/Float_t(nEvents);
416  if (DoRegression()) estimator = estimator/Float_t(sumOfWeights);
417  else if (DoMulticlass()) estimator = estimator/Float_t(sumOfWeights);
418  else estimator = estimator/Float_t(sumOfWeights);
419 
420 
421  //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
422 
423  Data()->SetCurrentType( saveType );
424 
425  // provide epoch-wise monitoring
426  if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
427  CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
428  }
429 
430  return estimator;
431 }
432 
433 ////////////////////////////////////////////////////////////////////////////////
434 
436 {
437  if (fNetwork == 0) {
438  //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
439  Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
441  }
442  Log() << kDEBUG << "reinitalize learning rates" << Endl;
444  Log() << kHEADER;
445  PrintMessage("Training Network");
446  Log() << Endl;
448  Int_t nSynapses=fSynapses->GetEntriesFast();
449  if (nSynapses>nEvents)
450  Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
451 
452  fIPyMaxIter = nEpochs;
454  std::vector<TString> titles = {"Error on training set", "Error on test set"};
455  fInteractive->Init(titles);
456  }
457 
458 #ifdef MethodMLP_UseMinuit__
459  if (useMinuit) MinuitMinimize();
460 #else
462  else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
463  else BackPropagationMinimize(nEpochs);
464 #endif
465 
466  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
467  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
468  if (fUseRegulator){
469  Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
471  Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
472  }
473 
475  {
476  Int_t numSynapses=fSynapses->GetEntriesFast();
477  fInvHessian.ResizeTo(numSynapses,numSynapses);
479  }
481 }
482 
483 ////////////////////////////////////////////////////////////////////////////////
484 /// train network with BFGS algorithm
485 
487 {
488  Timer timer( (fSteps>0?100:nEpochs), GetName() );
489 
490  // create histograms for overtraining monitoring
491  Int_t nbinTest = Int_t(nEpochs/fTestRate);
492  if(!IsSilentFile())
493  {
494  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
495  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
496  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
497  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
498  }
499 
500  Int_t nSynapses = fSynapses->GetEntriesFast();
501  Int_t nWeights = nSynapses;
502 
503  for (Int_t i=0;i<nSynapses;i++) {
504  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
505  synapse->SetDEDw(0.0);
506  }
507 
508  std::vector<Double_t> buffer( nWeights );
509  for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
510 
511  TMatrixD Dir ( nWeights, 1 );
512  TMatrixD Hessian ( nWeights, nWeights );
513  TMatrixD Gamma ( nWeights, 1 );
514  TMatrixD Delta ( nWeights, 1 );
515  Int_t RegUpdateCD=0; //zjh
516  Int_t RegUpdateTimes=0; //zjh
517  Double_t AccuError=0;
518 
519  Double_t trainE = -1;
520  Double_t testE = -1;
521 
522  fLastAlpha = 0.;
523 
525  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
526 
527  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
528  timer.DrawProgressBar( 0 );
529 
530  // start training cycles (epochs)
531  for (Int_t i = 0; i < nEpochs; i++) {
532 
533  if (fExitFromTraining) break;
534  fIPyCurrentIter = i;
535  if (Float_t(i)/nEpochs < fSamplingEpoch) {
536  if ((i+1)%fTestRate == 0 || (i == 0)) {
537  if (fSamplingTraining) {
540  Data()->CreateSampling();
541  }
542  if (fSamplingTesting) {
545  Data()->CreateSampling();
546  }
547  }
548  }
549  else {
551  Data()->InitSampling(1.0,1.0);
553  Data()->InitSampling(1.0,1.0);
554  }
556 
557  //zjh
558  if (fUseRegulator) {
559  UpdatePriors();
560  RegUpdateCD++;
561  }
562  //zjh
563 
564  SetGammaDelta( Gamma, Delta, buffer );
565 
566  if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
567  SteepestDir( Dir );
568  Hessian.UnitMatrix();
569  RegUpdateCD=0; //zjh
570  }
571  else {
572  if (GetHessian( Hessian, Gamma, Delta )) {
573  SteepestDir( Dir );
574  Hessian.UnitMatrix();
575  RegUpdateCD=0; //zjh
576  }
577  else SetDir( Hessian, Dir );
578  }
579 
580  Double_t dError=0; //zjh
581  if (DerivDir( Dir ) > 0) {
582  SteepestDir( Dir );
583  Hessian.UnitMatrix();
584  RegUpdateCD=0; //zjh
585  }
586  if (LineSearch( Dir, buffer, &dError )) { //zjh
587  Hessian.UnitMatrix();
588  SteepestDir( Dir );
589  RegUpdateCD=0; //zjh
590  if (LineSearch(Dir, buffer, &dError)) { //zjh
591  i = nEpochs;
592  Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
593  }
594  }
595 
596  //zjh+
597  if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
598  AccuError+=dError;
599 
600  if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
601  Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
603  Hessian.UnitMatrix();
604  RegUpdateCD=0;
605  RegUpdateTimes++;
606  AccuError=0;
607  }
608  //zjh-
609 
610  // monitor convergence of training and control sample
611  if ((i+1)%fTestRate == 0) {
612  //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
613  //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
614  trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
615  testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
616  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
617  if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
618  {
619  fEstimatorHistTrain->Fill( i+1, trainE );
620  fEstimatorHistTest ->Fill( i+1, testE );
621  }
622  Bool_t success = kFALSE;
623  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
624  success = kTRUE;
625  }
626  Data()->EventResult( success );
627 
628  SetCurrentValue( testE );
629  if (HasConverged()) {
630  if (Float_t(i)/nEpochs < fSamplingEpoch) {
631  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
632  i = newEpoch;
634  }
635  else break;
636  }
637  }
638 
639  // draw progress
640  TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
641  if (fSteps > 0) {
642  Float_t progress = 0;
643  if (Float_t(i)/nEpochs < fSamplingEpoch)
644  // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
645  progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
646  else
647  {
648  // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
649  progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
650  }
651  Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
652  if (progress2>progress) progress=progress2; //zjh
653  timer.DrawProgressBar( Int_t(progress), convText );
654  }
655  else {
656  Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
657  if (progress<i) progress=i; //zjh
658  timer.DrawProgressBar( progress, convText ); //zjh
659  }
660 
661  // some verbose output
662  if (fgPRINT_SEQ) {
663  PrintNetwork();
664  WaitForKeyboard();
665  }
666  }
667 }
668 
669 ////////////////////////////////////////////////////////////////////////////////
670 
671 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
672 {
673  Int_t nWeights = fSynapses->GetEntriesFast();
674 
675  Int_t IDX = 0;
676  Int_t nSynapses = fSynapses->GetEntriesFast();
677  for (Int_t i=0;i<nSynapses;i++) {
678  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
679  Gamma[IDX++][0] = -synapse->GetDEDw();
680  }
681 
682  for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
683 
684  ComputeDEDw();
685 
686  IDX = 0;
687  for (Int_t i=0;i<nSynapses;i++)
688  {
689  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
690  Gamma[IDX++][0] += synapse->GetDEDw();
691  }
692 }
693 
694 ////////////////////////////////////////////////////////////////////////////////
695 
697 {
698  Int_t nSynapses = fSynapses->GetEntriesFast();
699  for (Int_t i=0;i<nSynapses;i++) {
700  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
701  synapse->SetDEDw( 0.0 );
702  }
703 
705  Int_t nPosEvents = nEvents;
706  for (Int_t i=0;i<nEvents;i++) {
707 
708  const Event* ev = GetEvent(i);
710  && (Data()->GetCurrentType() == Types::kTraining)){
711  --nPosEvents;
712  continue;
713  }
714 
715  SimulateEvent( ev );
716 
717  for (Int_t j=0;j<nSynapses;j++) {
718  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
719  synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
720  }
721  }
722 
723  for (Int_t i=0;i<nSynapses;i++) {
724  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
725  Double_t DEDw=synapse->GetDEDw(); //zjh
726  if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
727  synapse->SetDEDw( DEDw / nPosEvents ); //zjh
728  }
729 }
730 
731 ////////////////////////////////////////////////////////////////////////////////
732 
734 {
735  Double_t eventWeight = ev->GetWeight();
736 
737  ForceNetworkInputs( ev );
739 
740  if (DoRegression()) {
741  UInt_t ntgt = DataInfo().GetNTargets();
742  for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
743  Double_t desired = ev->GetTarget(itgt);
744  Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
745  GetOutputNeuron( itgt )->SetError(error);
746  }
747  } else if (DoMulticlass()) {
748  UInt_t nClasses = DataInfo().GetNClasses();
749  UInt_t cls = ev->GetClass();
750  for (UInt_t icls = 0; icls < nClasses; icls++) {
751  Double_t desired = ( cls==icls ? 1.0 : 0.0 );
752  Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
753  GetOutputNeuron( icls )->SetError(error);
754  }
755  } else {
756  Double_t desired = GetDesiredOutput( ev );
757  Double_t error=-1; //zjh
758  if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
759  else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
760  GetOutputNeuron()->SetError(error);
761  }
762 
764  for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
765  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
766  synapse->InitDelta();
767  synapse->CalculateDelta();
768  }
769 }
770 
771 ////////////////////////////////////////////////////////////////////////////////
772 
774 {
775  Int_t IDX = 0;
776  Int_t nSynapses = fSynapses->GetEntriesFast();
777 
778  for (Int_t i=0;i<nSynapses;i++) {
779  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
780  Dir[IDX++][0] = -synapse->GetDEDw();
781  }
782 }
783 
784 ////////////////////////////////////////////////////////////////////////////////
785 
787 {
788  TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
789  if ((Double_t) gd[0][0] == 0.) return kTRUE;
790  TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
791  TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
792  TMatrixD gHg(Gamma, TMatrixD::kTransposeMult, aHg);
793  Double_t a = 1 / (Double_t) gd[0][0];
794  Double_t f = 1 + ((Double_t)gHg[0][0]*a);
796  res *= f;
797  res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
799  res *= a;
800  Hessian += res;
801 
802  return kFALSE;
803 }
804 
805 ////////////////////////////////////////////////////////////////////////////////
806 
808 {
809  Int_t IDX = 0;
810  Int_t nSynapses = fSynapses->GetEntriesFast();
811  TMatrixD DEDw(nSynapses, 1);
812 
813  for (Int_t i=0;i<nSynapses;i++) {
814  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
815  DEDw[IDX++][0] = synapse->GetDEDw();
816  }
817 
818  dir = Hessian * DEDw;
819  for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
820 }
821 
822 ////////////////////////////////////////////////////////////////////////////////
823 
825 {
826  Int_t IDX = 0;
827  Int_t nSynapses = fSynapses->GetEntriesFast();
828  Double_t Result = 0.0;
829 
830  for (Int_t i=0;i<nSynapses;i++) {
831  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
832  Result += Dir[IDX++][0] * synapse->GetDEDw();
833  }
834  return Result;
835 }
836 
837 ////////////////////////////////////////////////////////////////////////////////
838 
839 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
840 {
841  Int_t IDX = 0;
842  Int_t nSynapses = fSynapses->GetEntriesFast();
843  Int_t nWeights = nSynapses;
844 
845  std::vector<Double_t> Origin(nWeights);
846  for (Int_t i=0;i<nSynapses;i++) {
847  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
848  Origin[i] = synapse->GetWeight();
849  }
850 
851  Double_t err1 = GetError();
852  Double_t errOrigin=err1;//zjh
853  Double_t alpha1 = 0.;
854  Double_t alpha2 = fLastAlpha;
855 
856 
857  if (alpha2 < 0.01) alpha2 = 0.01;
858  else if (alpha2 > 2.0) alpha2 = 2.0;
859  Double_t alpha_original = alpha2;
860  Double_t alpha3 = alpha2;
861 
862  SetDirWeights( Origin, Dir, alpha2 );
863  Double_t err2 = GetError();
864  //Double_t err2 = err1;
865  Double_t err3 = err2;
866  Bool_t bingo = kFALSE;
867 
868 
869  if (err1 > err2) {
870  for (Int_t i=0;i<100;i++) {
871  alpha3 *= fTau;
872  SetDirWeights(Origin, Dir, alpha3);
873  err3 = GetError();
874  if (err3 > err2) {
875  bingo = kTRUE;
876  break;
877  }
878  alpha1 = alpha2;
879  err1 = err2;
880  alpha2 = alpha3;
881  err2 = err3;
882  }
883  if (!bingo) {
884  SetDirWeights(Origin, Dir, 0.);
885  return kTRUE;
886  }
887  }
888  else {
889  for (Int_t i=0;i<100;i++) {
890  alpha2 /= fTau;
891  if (i==50) {
892  Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
893  alpha2 = -alpha_original;
894  }
895  SetDirWeights(Origin, Dir, alpha2);
896  err2 = GetError();
897  if (err1 > err2) {
898  bingo = kTRUE;
899  break;
900  }
901  alpha3 = alpha2;
902  err3 = err2;
903  }
904  if (!bingo) {
905  SetDirWeights(Origin, Dir, 0.);
906  Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
907  fLastAlpha = 0.05;
908  return kTRUE;
909  }
910  }
911 
912  if (alpha1>0 && alpha2>0 && alpha3 > 0) {
913  fLastAlpha = 0.5 * (alpha1 + alpha3 -
914  (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
915  - ( err2 - err1 ) / (alpha2 - alpha1 )));
916  }
917  else {
918  fLastAlpha = alpha2;
919  }
920 
921  fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
922 
923  SetDirWeights(Origin, Dir, fLastAlpha);
924 
925  // leaving these lines uncommented is a heavy price to pay for only a warning message
926  // (which shoulnd't appear anyway)
927  // --> about 15% of time is spent in the final GetError().
928  //
929  Double_t finalError = GetError();
930  if (finalError > err1) {
931  Log() << kWARNING << "Line search increased error! Something is wrong."
932  << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
933  << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
934  }
935 
936  for (Int_t i=0;i<nSynapses;i++) {
937  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
938  buffer[IDX] = synapse->GetWeight() - Origin[IDX];
939  IDX++;
940  }
941 
942  if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
943 
944  return kFALSE;
945 }
946 
947 ////////////////////////////////////////////////////////////////////////////////
948 
949 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
950 {
951  Int_t IDX = 0;
952  Int_t nSynapses = fSynapses->GetEntriesFast();
953 
954  for (Int_t i=0;i<nSynapses;i++) {
955  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
956  synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
957  IDX++;
958  }
959  if (fUseRegulator) UpdatePriors();//zjh
960 }
961 
962 
963 ////////////////////////////////////////////////////////////////////////////////
964 
966 {
968  UInt_t ntgts = GetNTargets();
969  Double_t Result = 0.;
970 
971  for (Int_t i=0;i<nEvents;i++) {
972  const Event* ev = GetEvent(i);
973 
975  && (Data()->GetCurrentType() == Types::kTraining)){
976  continue;
977  }
978  SimulateEvent( ev );
979 
980  Double_t error = 0.;
981  if (DoRegression()) {
982  for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
983  error += GetMSEErr( ev, itgt );//zjh
984  }
985  } else if ( DoMulticlass() ){
986  for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
987  error += GetMSEErr( ev, icls );
988  }
989  } else {
990  if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
991  else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
992  }
993  Result += error * ev->GetWeight();
994  }
995  if (fUseRegulator) Result+=fPrior; //zjh
996  if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
997  return Result;
998 }
999 
1000 ////////////////////////////////////////////////////////////////////////////////
1001 
1003 {
1004  Double_t error = 0;
1006  Double_t target = 0;
1007  if (DoRegression()) target = ev->GetTarget( index );
1008  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1009  else target = GetDesiredOutput( ev );
1010 
1011  error = 0.5*(output-target)*(output-target); //zjh
1012 
1013  return error;
1014 
1015 }
1016 
1017 ////////////////////////////////////////////////////////////////////////////////
1018 
1020 {
1021  Double_t error = 0;
1023  Double_t target = 0;
1024  if (DoRegression()) target = ev->GetTarget( index );
1025  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1026  else target = GetDesiredOutput( ev );
1027 
1028  error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1029 
1030  return error;
1031 }
1032 
1033 ////////////////////////////////////////////////////////////////////////////////
1034 /// minimize estimator / train network with backpropagation algorithm
1035 
1037 {
1038  // Timer timer( nEpochs, GetName() );
1039  Timer timer( (fSteps>0?100:nEpochs), GetName() );
1040  Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1041 
1042  // create histograms for overtraining monitoring
1043  Int_t nbinTest = Int_t(nEpochs/fTestRate);
1044  if(!IsSilentFile())
1045  {
1046  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1047  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1048  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1049  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1050  }
1052  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1053 
1054  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1055  timer.DrawProgressBar(0);
1056 
1057  // estimators
1058  Double_t trainE = -1;
1059  Double_t testE = -1;
1060 
1061  // start training cycles (epochs)
1062  for (Int_t i = 0; i < nEpochs; i++) {
1063 
1064  if (fExitFromTraining) break;
1065  fIPyCurrentIter = i;
1066  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1067  if ((i+1)%fTestRate == 0 || (i == 0)) {
1068  if (fSamplingTraining) {
1071  Data()->CreateSampling();
1072  }
1073  if (fSamplingTesting) {
1076  Data()->CreateSampling();
1077  }
1078  }
1079  }
1080  else {
1082  Data()->InitSampling(1.0,1.0);
1084  Data()->InitSampling(1.0,1.0);
1085  }
1087 
1088  TrainOneEpoch();
1089  DecaySynapseWeights(i >= lateEpoch);
1090 
1091  // monitor convergence of training and control sample
1092  if ((i+1)%fTestRate == 0) {
1093  trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1094  testE = CalculateEstimator( Types::kTesting, i ); // estimator for test samplea
1095  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1096  if(!IsSilentFile())
1097  {
1098  fEstimatorHistTrain->Fill( i+1, trainE );
1099  fEstimatorHistTest ->Fill( i+1, testE );
1100  }
1101  Bool_t success = kFALSE;
1102  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1103  success = kTRUE;
1104  }
1105  Data()->EventResult( success );
1106 
1107  SetCurrentValue( testE );
1108  if (HasConverged()) {
1109  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1110  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1111  i = newEpoch;
1113  }
1114  else {
1115  if (lateEpoch > i) lateEpoch = i;
1116  else break;
1117  }
1118  }
1119  }
1120 
1121  // draw progress bar (add convergence value)
1122  TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1123  if (fSteps > 0) {
1124  Float_t progress = 0;
1125  if (Float_t(i)/nEpochs < fSamplingEpoch)
1126  progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1127  else
1128  progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1129 
1130  timer.DrawProgressBar( Int_t(progress), convText );
1131  }
1132  else {
1133  timer.DrawProgressBar( i, convText );
1134  }
1135  }
1136 }
1137 
1138 ////////////////////////////////////////////////////////////////////////////////
1139 /// train network over a single epoch/cyle of events
1140 
1142 {
1143  Int_t nEvents = Data()->GetNEvents();
1144 
1145  // randomize the order events will be presented, important for sequential mode
1146  Int_t* index = new Int_t[nEvents];
1147  for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1148  Shuffle(index, nEvents);
1149 
1150  // loop over all training events
1151  for (Int_t i = 0; i < nEvents; i++) {
1152 
1153  const Event * ev = GetEvent(index[i]);
1154  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1155  && (Data()->GetCurrentType() == Types::kTraining)){
1156  continue;
1157  }
1158 
1159  TrainOneEvent(index[i]);
1160 
1161  // do adjustments if in batch mode
1162  if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1164  if (fgPRINT_BATCH) {
1165  PrintNetwork();
1166  WaitForKeyboard();
1167  }
1168  }
1169 
1170  // debug in sequential mode
1171  if (fgPRINT_SEQ) {
1172  PrintNetwork();
1173  WaitForKeyboard();
1174  }
1175  }
1176 
1177  delete[] index;
1178 }
1179 
1180 ////////////////////////////////////////////////////////////////////////////////
1181 /// Input:
1182 /// index: the array to shuffle
1183 /// n: the size of the array
1184 /// Output:
1185 /// index: the shuffled indexes
1186 /// This method is used for sequential training
1187 
1189 {
1190  Int_t j, k;
1191  Int_t a = n - 1;
1192  for (Int_t i = 0; i < n; i++) {
1193  j = (Int_t) (frgen->Rndm() * a);
1194  if (j<n){ // address the 'worries' of coverity
1195  k = index[j];
1196  index[j] = index[i];
1197  index[i] = k;
1198  }
1199  }
1200 }
1201 
1202 ////////////////////////////////////////////////////////////////////////////////
1203 /// decay synapse weights
1204 /// in last 10 epochs, lower learning rate even more to find a good minimum
1205 
1207 {
1208  TSynapse* synapse;
1209  Int_t numSynapses = fSynapses->GetEntriesFast();
1210  for (Int_t i = 0; i < numSynapses; i++) {
1211  synapse = (TSynapse*)fSynapses->At(i);
1212  if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1213  else synapse->DecayLearningRate(fDecayRate);
1214  }
1215 }
1216 
1217 ////////////////////////////////////////////////////////////////////////////////
1218 /// fast per-event training
1219 
1221 {
1222  GetEvent(ievt);
1223 
1224  // as soon as we know how to get event weights, get that here
1225 
1226  // note: the normalization of event weights will affect the choice
1227  // of learning rate, one will have to experiment to get the right value.
1228  // in general, if the "average" event weight is 1, the learning rate
1229  // should be good if set around 0.02 (a good value if all event weights are 1)
1230  Double_t eventWeight = 1.0;
1231 
1232  // get the desired output of this event
1233  Double_t desired;
1234  if (type == 0) desired = fOutput->GetMin(); // background //zjh
1235  else desired = fOutput->GetMax(); // signal //zjh
1236 
1237  // force the value for each input neuron
1238  Double_t x;
1239  TNeuron* neuron;
1240 
1241  for (UInt_t j = 0; j < GetNvar(); j++) {
1242  x = branchVar[j];
1243  if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1244  neuron = GetInputNeuron(j);
1245  neuron->ForceValue(x);
1246  }
1247 
1249  UpdateNetwork(desired, eventWeight);
1250 }
1251 
1252 ////////////////////////////////////////////////////////////////////////////////
1253 /// train network over a single event
1254 /// this uses the new event model
1255 
1257 {
1258  // note: the normalization of event weights will affect the choice
1259  // of learning rate, one will have to experiment to get the right value.
1260  // in general, if the "average" event weight is 1, the learning rate
1261  // should be good if set around 0.02 (a good value if all event weights are 1)
1262 
1263  const Event * ev = GetEvent(ievt);
1264  Double_t eventWeight = ev->GetWeight();
1265  ForceNetworkInputs( ev );
1267  if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1268  if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1269  else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1270 }
1271 
1272 ////////////////////////////////////////////////////////////////////////////////
1273 /// get the desired output of this event
1274 
1276 {
1277  return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1278 }
1279 
1280 
1281 ////////////////////////////////////////////////////////////////////////////////
1282 /// update the network based on how closely
1283 /// the output matched the desired output
1284 
1286 {
1287  Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1288  if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1289  else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1290  else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1291  error *= eventWeight;
1292  GetOutputNeuron()->SetError(error);
1294  UpdateSynapses();
1295 }
1296 
1297 ////////////////////////////////////////////////////////////////////////////////
1298 /// update the network based on how closely
1299 /// the output matched the desired output
1300 
1301 void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1302 {
1303  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1304  Double_t error = GetOutputNeuron( i )->GetActivationValue() - desired.at(i);
1305  error *= eventWeight;
1306  GetOutputNeuron( i )->SetError(error);
1307  }
1309  UpdateSynapses();
1310 }
1311 
1312 
1313 ////////////////////////////////////////////////////////////////////////////////
1314 /// have each neuron calculate its delta by backpropagation
1315 
1317 {
1318  TNeuron* neuron;
1319  Int_t numNeurons;
1320  Int_t numLayers = fNetwork->GetEntriesFast();
1321  TObjArray* curLayer;
1322 
1323  // step backwards through the network (backpropagation)
1324  // deltas calculated starting at output layer
1325  for (Int_t i = numLayers-1; i >= 0; i--) {
1326  curLayer = (TObjArray*)fNetwork->At(i);
1327  numNeurons = curLayer->GetEntriesFast();
1328 
1329  for (Int_t j = 0; j < numNeurons; j++) {
1330  neuron = (TNeuron*) curLayer->At(j);
1331  neuron->CalculateDelta();
1332  }
1333  }
1334 }
1335 
1336 ////////////////////////////////////////////////////////////////////////////////
1337 /// create genetics class similar to GeneticCut
1338 /// give it vector of parameter ranges (parameters = weights)
1339 /// link fitness function of this class to ComputeEstimator
1340 /// instantiate GA (see MethodCuts)
1341 /// run it
1342 /// then this should exist for GA, Minuit and random sampling
1343 
1345 {
1346  PrintMessage("Minimizing Estimator with GA");
1347 
1348  // define GA parameters
1349  fGA_preCalc = 1;
1350  fGA_SC_steps = 10;
1351  fGA_SC_rate = 5;
1352  fGA_SC_factor = 0.95;
1353  fGA_nsteps = 30;
1354 
1355  // ranges
1356  std::vector<Interval*> ranges;
1357 
1358  Int_t numWeights = fSynapses->GetEntriesFast();
1359  for (Int_t ivar=0; ivar< numWeights; ivar++) {
1360  ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1361  }
1362 
1363  FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1364  gf->Run();
1365 
1366  Double_t estimator = CalculateEstimator();
1367  Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1368 }
1369 
1370 ////////////////////////////////////////////////////////////////////////////////
1371 /// interface to the estimate
1372 
1373 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1374 {
1375  return ComputeEstimator( parameters );
1376 }
1377 
1378 ////////////////////////////////////////////////////////////////////////////////
1379 /// this function is called by GeneticANN for GA optimization
1380 
1381 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1382 {
1383  TSynapse* synapse;
1384  Int_t numSynapses = fSynapses->GetEntriesFast();
1385 
1386  for (Int_t i = 0; i < numSynapses; i++) {
1387  synapse = (TSynapse*)fSynapses->At(i);
1388  synapse->SetWeight(parameters.at(i));
1389  }
1390  if (fUseRegulator) UpdatePriors(); //zjh
1391 
1392  Double_t estimator = CalculateEstimator();
1393 
1394  return estimator;
1395 }
1396 
1397 ////////////////////////////////////////////////////////////////////////////////
1398 /// update synapse error fields and adjust the weights (if in sequential mode)
1399 
1401 {
1402  TNeuron* neuron;
1403  Int_t numNeurons;
1404  TObjArray* curLayer;
1405  Int_t numLayers = fNetwork->GetEntriesFast();
1406 
1407  for (Int_t i = 0; i < numLayers; i++) {
1408  curLayer = (TObjArray*)fNetwork->At(i);
1409  numNeurons = curLayer->GetEntriesFast();
1410 
1411  for (Int_t j = 0; j < numNeurons; j++) {
1412  neuron = (TNeuron*) curLayer->At(j);
1413  if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1414  else neuron->UpdateSynapsesSequential();
1415  }
1416  }
1417 }
1418 
1419 ////////////////////////////////////////////////////////////////////////////////
1420 /// just adjust the synapse weights (should be called in batch mode)
1421 
1423 {
1424  TNeuron* neuron;
1425  Int_t numNeurons;
1426  TObjArray* curLayer;
1427  Int_t numLayers = fNetwork->GetEntriesFast();
1428 
1429  for (Int_t i = numLayers-1; i >= 0; i--) {
1430  curLayer = (TObjArray*)fNetwork->At(i);
1431  numNeurons = curLayer->GetEntriesFast();
1432 
1433  for (Int_t j = 0; j < numNeurons; j++) {
1434  neuron = (TNeuron*) curLayer->At(j);
1435  neuron->AdjustSynapseWeights();
1436  }
1437  }
1438 }
1439 
1440 ////////////////////////////////////////////////////////////////////////////////
1441 
1443 {
1444  fPrior=0;
1445  fPriorDev.clear();
1446  Int_t nSynapses = fSynapses->GetEntriesFast();
1447  for (Int_t i=0;i<nSynapses;i++) {
1448  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1449  fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1450  fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1451  }
1452 }
1453 
1454 ////////////////////////////////////////////////////////////////////////////////
1455 
1457 {
1458  TMatrixD InvH(0,0);
1459  GetApproxInvHessian(InvH);
1460  Int_t numSynapses=fSynapses->GetEntriesFast();
1461  Int_t numRegulators=fRegulators.size();
1462  Float_t gamma=0,
1463  variance=1.; // Gaussian noise
1464  std::vector<Int_t> nWDP(numRegulators);
1465  std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1466  for (int i=0;i<numSynapses;i++) {
1467  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1468  Int_t idx=fRegulatorIdx[i];
1469  nWDP[idx]++;
1470  trace[idx]+=InvH[i][i];
1471  gamma+=1-fRegulators[idx]*InvH[i][i];
1472  weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1473  }
1474  if (fEstimator==kMSE) {
1475  if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1476  else variance=CalculateEstimator( Types::kTraining, 0 );
1477  }
1478 
1479  //Log() << kDEBUG << Endl;
1480  for (int i=0;i<numRegulators;i++)
1481  {
1482  //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1483  fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1484  if (fRegulators[i]<0) fRegulators[i]=0;
1485  Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1486  }
1487  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1488  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1489 
1490  Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1491 
1492 }
1493 
1494 ////////////////////////////////////////////////////////////////////////////////
1495 
1496 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1497 {
1498  Int_t numSynapses=fSynapses->GetEntriesFast();
1499  InvHessian.ResizeTo( numSynapses, numSynapses );
1500  InvHessian=0;
1501  TMatrixD sens(numSynapses,1);
1502  TMatrixD sensT(1,numSynapses);
1503  Int_t nEvents = GetNEvents();
1504  for (Int_t i=0;i<nEvents;i++) {
1505  GetEvent(i);
1506  double outputValue=GetMvaValue(); // force calculation
1509  for (Int_t j = 0; j < numSynapses; j++){
1510  TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1511  synapses->InitDelta();
1512  synapses->CalculateDelta();
1513  sens[j][0]=sensT[0][j]=synapses->GetDelta();
1514  }
1515  if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1516  else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1517  }
1518 
1519  // TVectorD eValue(numSynapses);
1520  if (regulate) {
1521  for (Int_t i = 0; i < numSynapses; i++){
1522  InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1523  }
1524  }
1525  else {
1526  for (Int_t i = 0; i < numSynapses; i++){
1527  InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1528  }
1529  }
1530 
1531  InvHessian.Invert();
1532 
1533 }
1534 
1535 ////////////////////////////////////////////////////////////////////////////////
1536 
1538 {
1539  Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1540 
1541  // no hessian (old training file) or no error reqested
1542  if (!fCalculateErrors || errLower==0 || errUpper==0)
1543  return MvaValue;
1544 
1545  Double_t MvaUpper,MvaLower,median,variance;
1546  Int_t numSynapses=fSynapses->GetEntriesFast();
1547  if (fInvHessian.GetNcols()!=numSynapses) {
1548  Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1549  }
1550  TMatrixD sens(numSynapses,1);
1551  TMatrixD sensT(1,numSynapses);
1553  //GetOutputNeuron()->SetError(1.);
1555  for (Int_t i = 0; i < numSynapses; i++){
1556  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1557  synapses->InitDelta();
1558  synapses->CalculateDelta();
1559  sensT[0][i]=synapses->GetDelta();
1560  }
1561  sens.Transpose(sensT);
1562  TMatrixD sig=sensT*fInvHessian*sens;
1563  variance=sig[0][0];
1564  median=GetOutputNeuron()->GetValue();
1565 
1566  if (variance<0) {
1567  Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1568  variance=0;
1569  }
1570  variance=sqrt(variance);
1571 
1572  //upper
1573  MvaUpper=fOutput->Eval(median+variance);
1574  if(errUpper)
1575  *errUpper=MvaUpper-MvaValue;
1576 
1577  //lower
1578  MvaLower=fOutput->Eval(median-variance);
1579  if(errLower)
1580  *errLower=MvaValue-MvaLower;
1581 
1582  return MvaValue;
1583 }
1584 
1585 
1586 #ifdef MethodMLP_UseMinuit__
1587 
1588 ////////////////////////////////////////////////////////////////////////////////
1589 /// minimize using Minuit
1590 
1591 void TMVA::MethodMLP::MinuitMinimize()
1592 {
1593  fNumberOfWeights = fSynapses->GetEntriesFast();
1594 
1595  TFitter* tfitter = new TFitter( fNumberOfWeights );
1596 
1597  // minuit-specific settings
1598  Double_t args[10];
1599 
1600  // output level
1601  args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1602  tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1603  tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1604 
1605  double w[54];
1606 
1607  // init parameters
1608  for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1609  TString parName = Form("w%i", ipar);
1610  tfitter->SetParameter( ipar,
1611  parName, w[ipar], 0.1, 0, 0 );
1612  }
1613 
1614  // define the CFN function
1615  tfitter->SetFCN( &IFCN );
1616 
1617  // define fit strategy
1618  args[0] = 2;
1619  tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1620 
1621  // now do the fit !
1622  args[0] = 1e-04;
1623  tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1624 
1625  Bool_t doBetter = kFALSE;
1626  Bool_t doEvenBetter = kFALSE;
1627  if (doBetter) {
1628  args[0] = 1e-04;
1629  tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1630 
1631  if (doEvenBetter) {
1632  args[0] = 500;
1633  tfitter->ExecuteCommand( "MINOS", args, 1 );
1634  }
1635  }
1636 }
1637 
1638 _____________________________________________________________________________
1639 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1640 {
1641  // Evaluate the minimisation function ----------------------------------------------------
1642  //
1643  // Input parameters:
1644  // npars: number of currently variable parameters
1645  // CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1646  // fitPars: array of (constant and variable) parameters
1647  // iflag: indicates what is to be calculated (see example below)
1648  // grad: array of gradients
1649  //
1650  // Output parameters:
1651  // f: the calculated function value.
1652  // grad: the (optional) vector of first derivatives).
1653  // ---------------------------------------------------------------------------------------
1654  ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1655 }
1656 
1657 TTHREAD_TLS(Int_t) nc = 0;
1658 TTHREAD_TLS(double) minf = 1000000;
1659 
1660 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1661 {
1662  // first update the weights
1663  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1664  TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1665  synapse->SetWeight(fitPars[ipar]);
1666  }
1667 
1668  // now compute the estimator
1669  f = CalculateEstimator();
1670 
1671  nc++;
1672  if (f < minf) minf = f;
1673  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1674  Log() << kDEBUG << Endl;
1675  Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1676 }
1677 
1678 ////////////////////////////////////////////////////////////////////////////////
1679 /// global "this" pointer to be used in minuit
1680 
1681 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1682 {
1683  return fgThis;
1684 }
1685 
1686 #endif
1687 
1688 
1689 ////////////////////////////////////////////////////////////////////////////////
1690 /// write specific classifier response
1691 
1692 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1693 {
1694  MethodANNBase::MakeClassSpecific(fout, className);
1695 }
1696 
1697 ////////////////////////////////////////////////////////////////////////////////
1698 /// get help message text
1699 ///
1700 /// typical length of text line:
1701 /// "|--------------------------------------------------------------|"
1702 
1704 {
1705  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1706  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1707 
1708  Log() << Endl;
1709  Log() << col << "--- Short description:" << colres << Endl;
1710  Log() << Endl;
1711  Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1712  Log() << "forward multilayer perceptron impementation. The MLP has a user-" << Endl;
1713  Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1714  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1715  Log() << "signal and one background). " << Endl;
1716  Log() << Endl;
1717  Log() << col << "--- Performance optimisation:" << colres << Endl;
1718  Log() << Endl;
1719  Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1720  Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1721  Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1722  Log() << "number of input variables that have only little discrimination power. " << Endl;
1723  Log() << "" << Endl;
1724  Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1725  Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1726  Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1727  Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1728  Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1729  Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1730  Log() << "competitive results)." << Endl;
1731  Log() << Endl;
1732  Log() << col << "Overtraining: " << colres
1733  << "only the TMlpANN performs an explicit separation of the" << Endl;
1734  Log() << "full training sample into independent training and validation samples." << Endl;
1735  Log() << "We have found that in most high-energy physics applications the " << Endl;
1736  Log() << "avaliable degrees of freedom (training events) are sufficient to " << Endl;
1737  Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1738  Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1739  Log() << "the use of validation samples would only reduce the available training" << Endl;
1740  Log() << "information. However, if the perrormance on the training sample is " << Endl;
1741  Log() << "found to be significantly better than the one found with the inde-" << Endl;
1742  Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1743  Log() << "are printed to standard output at the end of each training job." << Endl;
1744  Log() << Endl;
1745  Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1746  Log() << Endl;
1747  Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1748  Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1749  Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1750  Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1751  Log() << "in favour of more neurons in the first hidden layer." << Endl;
1752  Log() << "" << Endl;
1753  Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1754  Log() << "adjustable weights is small compared to the training sample size," << Endl;
1755  Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1756 }
1757 
void WaitForKeyboard()
wait for keyboard input, for debugging
Float_t fSamplingFraction
Definition: MethodMLP.h:200
Config & gConfig()
Definition: Config.cxx:43
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1703
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3127
An array of TObjects.
Definition: TObjArray.h:39
virtual Double_t GetMax()=0
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1002
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void ForceNetworkCalculations()
calculate input values to each neuron
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1469
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:138
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:206
#define REGISTER_METHOD(CLASS)
for example
Double_t Log(Double_t x)
Definition: TMath.h:526
void Init()
default initializations
Definition: MethodMLP.cxx:164
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition: TNeuron.cxx:89
void DecayLearningRate(Double_t rate)
Definition: TSynapse.h:68
Double_t fLastAlpha
Definition: MethodMLP.h:207
static const Bool_t fgPRINT_SEQ
Definition: MethodMLP.h:240
virtual Double_t Rndm()
Machine independent random number generator.
Definition: TRandom3.cxx:94
Double_t GetError()
Definition: MethodMLP.cxx:965
float Float_t
Definition: RtypesCore.h:53
Int_t fGA_SC_rate
Definition: MethodMLP.h:224
Bool_t fSamplingTesting
Definition: MethodMLP.h:204
void SetDEDw(Double_t DEDw)
Definition: TSynapse.h:91
Double_t GetValue() const
Definition: TNeuron.h:116
const char * GetName() const
Definition: MethodBase.h:330
virtual Double_t GetMin()=0
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:680
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
Int_t fResetStep
Definition: MethodMLP.h:209
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:302
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Double_t GetDelta()
Definition: TSynapse.h:93
void TrainOneEpoch()
train network over a single epoch/cyle of events
Definition: MethodMLP.cxx:1141
TString fTrainMethodS
Definition: MethodMLP.h:198
UInt_t GetNvar() const
Definition: MethodBase.h:340
DataSet * Data() const
Definition: MethodBase.h:405
EAnalysisType
Definition: Types.h:128
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:773
UInt_t GetNTargets() const
Definition: DataSetInfo.h:129
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:807
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:186
Double_t fDecayRate
Definition: MethodMLP.h:213
bool fExitFromTraining
Definition: MethodBase.h:443
Bool_t IsNormalised() const
Definition: MethodBase.h:490
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
TArc * a
Definition: textangle.C:12
const Bool_t kFALSE
Definition: Rtypes.h:92
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1220
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition: TFitter.cxx:87
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: TrainingMetho...
Definition: MethodMLP.cxx:191
Int_t fGA_nsteps
Definition: MethodMLP.h:221
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:289
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:352
STL namespace.
Int_t GetEntriesFast() const
Definition: TObjArray.h:66
virtual Double_t Eval(Double_t arg)=0
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:486
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
Definition: TMatrixT.cxx:1210
Int_t fGA_preCalc
Definition: MethodMLP.h:222
Float_t fSamplingEpoch
Definition: MethodMLP.h:201
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:949
TActivation * fOutput
const char * Data() const
Definition: TString.h:349
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
double sqrt(double)
Tools & gTools()
Definition: Tools.cxx:79
virtual void SetFCN(void *fcn) R__DEPRECATED(6
Specify the address of the fitting algorithm (from the interpreter)
Definition: TFitter.cxx:546
void AdjustSynapseWeights()
adjust the pre-synapses&#39; weights for each neuron (input neuron has no pre-synapse) this method should...
Definition: TNeuron.cxx:270
TStopwatch timer
Definition: pirndm.C:37
Double_t x[n]
Definition: legend1.C:17
void Shuffle(Int_t *index, Int_t n)
Input: index: the array to shuffle n: the size of the array Output: index: the shuffled indexes This ...
Definition: MethodMLP.cxx:1188
ETrainingMethod fTrainingMethod
Definition: MethodMLP.h:197
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:80
void(* FCN)(Int_t &npar, Double_t *gin, Double_t &f, Double_t *u, Int_t flag)
Definition: testMinim.cxx:39
std::vector< TH1 * > fEpochMonHistB
Bool_t IsSignal(const Event *ev) const
UInt_t GetNTargets() const
Definition: MethodBase.h:342
TObjArray * fNetwork
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition: TSynapse.cxx:114
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1496
EBPTrainingMode fBPMode
Definition: MethodMLP.h:214
Bool_t DoMulticlass() const
Definition: MethodBase.h:435
void PrintMessage(TString message, Bool_t force=kFALSE) const
print messages, turn off printing by setting verbose and debug flag appropriately ...
Double_t fGA_SC_factor
Definition: MethodMLP.h:225
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:171
void ComputeDEDw()
Definition: MethodMLP.cxx:696
bool fUseRegulator
Definition: MethodMLP.h:188
virtual void ProcessOptions()
do nothing specific at this moment
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1256
Bool_t fEpochMon
Definition: MethodMLP.h:218
UInt_t fIPyCurrentIter
Definition: MethodBase.h:444
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1396
Float_t fImprovement
current value
Int_t fUpdateLimit
Definition: MethodMLP.h:195
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1400
std::vector< Double_t > fPriorDev
Definition: MethodMLP.h:191
std::vector< Float_t > & GetTargets()
Definition: Event.h:105
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1019
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1344
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1692
#define None
Definition: TGWin32.h:59
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:24
TObjArray * fSynapses
Double_t GetActivationValue() const
Definition: TNeuron.h:117
void InitializeLearningRates()
initialize learning rates of synapses, used only by backpropagation
Definition: MethodMLP.cxx:275
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Definition: MethodMLP.cxx:1206
const int nEvents
Definition: testRooFit.cxx:42
Double_t GetDEDw()
Definition: TSynapse.h:92
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1275
double gamma(double x)
void SetLearningRate(Double_t rate)
Definition: TSynapse.h:62
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:786
Bool_t HasConverged(Bool_t withinConvergenceBand=kFALSE)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1537
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with backpropagation algorithm
Definition: MethodMLP.cxx:1036
SVector< double, 2 > v
Definition: Dict.h:5
Float_t fWeightRange
Definition: MethodMLP.h:230
Int_t fSteps
minimum improvement which counts as improvement
UInt_t fIPyMaxIter
Definition: MethodBase.h:444
Float_t Progress()
returns a float from 0 (just started) to 1 (finished)
unsigned int UInt_t
Definition: RtypesCore.h:42
const Event * GetEvent() const
Definition: MethodBase.h:745
char * Form(const char *fmt,...)
std::vector< TH1 * > fEpochMonHistW
std::vector< Double_t > fRegulators
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:839
TNeuron * GetInputNeuron(Int_t index)
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1285
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:217
Bool_t IsSilentFile()
Definition: MethodBase.h:375
void CreateWeightMonitoringHists(const TString &bulkname, std::vector< TH1 * > *hv=0) const
static const Bool_t fgPRINT_BATCH
Definition: MethodMLP.h:241
std::vector< Int_t > fRegulatorIdx
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:501
TString fBpModeS
Definition: MethodMLP.h:215
Double_t GetWeight()
Definition: TSynapse.h:59
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
void SetError(Double_t error)
set error, this should only be done for an output neuron
Definition: TNeuron.cxx:219
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:671
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:565
#define ClassImp(name)
Definition: Rtypes.h:279
double f(double x)
void CalculateNeuronDeltas()
have each neuron calculate its delta by backpropagation
Definition: MethodMLP.cxx:1316
double Double_t
Definition: RtypesCore.h:55
void InitDelta()
Definition: TSynapse.h:89
TNeuron * GetOutputNeuron(Int_t index=0)
void ForceNetworkInputs(const Event *ev, Int_t ignoreIndex=-1)
force the input values of the input neurons force the value for each input neuron ...
int type
Definition: TGX11.cxx:120
Int_t GetNcols() const
Definition: TMatrixTBase.h:137
Double_t fLearnRate
Definition: MethodMLP.h:212
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
MsgLogger & Log() const
Definition: Configurable.h:128
The TH1 histogram class.
Definition: TH1.h:80
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:733
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
Double_t fPrior
Definition: MethodMLP.h:190
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1422
void AddPreDefVal(const T &)
Definition: Configurable.h:174
Double_t fTau
Definition: MethodMLP.h:208
UInt_t GetClass() const
Definition: Event.h:89
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition: TNeuron.cxx:249
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition: TNeuron.cxx:231
void ExitFromTraining()
Definition: MethodBase.h:458
Int_t fBatchSize
Definition: MethodMLP.h:216
virtual void PrintNetwork() const
print network representation, for debugging
void SetWeight(Double_t weight)
set synapse weight
Definition: TSynapse.cxx:74
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:152
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
Bool_t DoRegression() const
Definition: MethodBase.h:434
Abstract ClassifierFactory template that handles arbitrary types.
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:244
IPythonInteractive * fInteractive
Definition: MethodBase.h:442
Float_t fSamplingWeight
Definition: MethodMLP.h:202
bool fCalculateErrors
Definition: MethodMLP.h:189
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:353
const TString & GetOptions() const
Definition: Configurable.h:90
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1373
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1381
std::vector< TH1 * > fEpochMonHistS
Bool_t WriteOptionsReference() const
Definition: Config.h:66
std::vector< std::pair< Float_t, Float_t > > * fDeviationsFromTargets
Definition: MethodMLP.h:228
void SetCurrentValue(Float_t value)
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:127
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
TObject * At(Int_t idx) const
Definition: TObjArray.h:167
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:824
void UpdateRegulators()
Definition: MethodMLP.cxx:1456
Int_t fGA_SC_steps
Definition: MethodMLP.h:223
double exp(double)
static void output(int code)
Definition: gifencode.c:226
const Bool_t kTRUE
Definition: Rtypes.h:91
void CalculateDelta()
calculate error field
Definition: TNeuron.cxx:121
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:451
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
const Int_t n
Definition: legend1.C:16
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition: MethodMLP.cxx:90
virtual Double_t EvalDerivative(Double_t arg)=0
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:432
char name[80]
Definition: TGX11.cxx:109
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360
Bool_t fSamplingTraining
Definition: MethodMLP.h:203
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:352
std::vector< Float_t > * GetTargetsForMulticlass(const Event *ev)
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...
Definition: TFitter.cxx:591