Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
MethodMLP.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodMLP *
8 * *
9 * *
10 * Description: *
11 * ANN Multilayer Perceptron class for the discrimination of signal *
12 * from background. BFGS implementation based on TMultiLayerPerceptron *
13 * class from ROOT (http://root.cern.ch). *
14 * *
15 * Authors (alphabetical): *
16 * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18 * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19 * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20 * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21 * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22 * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25 * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26 * *
27 * Copyright (c) 2005-2011: *
28 * CERN, Switzerland *
29 * U. of Victoria, Canada *
30 * MPI-K Heidelberg, Germany *
31 * U. of Bonn, Germany *
32 * *
33 * Redistribution and use in source and binary forms, with or without *
34 * modification, are permitted according to the terms listed in LICENSE *
35 * (see tmva/doc/LICENSE) *
36 **********************************************************************************/
37
38/*! \class TMVA::MethodMLP
39\ingroup TMVA
40
41Multilayer Perceptron class built off of MethodANNBase
42
43*/
44
45#include "TMVA/MethodMLP.h"
46
47#include "TMVA/Config.h"
48#include "TMVA/Configurable.h"
51#include "TMVA/DataSet.h"
52#include "TMVA/DataSetInfo.h"
53#include "TMVA/FitterBase.h"
54#include "TMVA/GeneticFitter.h"
55#include "TMVA/IFitterTarget.h"
56#include "TMVA/IMethod.h"
57#include "TMVA/Interval.h"
58#include "TMVA/MethodANNBase.h"
59#include "TMVA/MsgLogger.h"
60#include "TMVA/TNeuron.h"
61#include "TMVA/TSynapse.h"
62#include "TMVA/Timer.h"
63#include "TMVA/Tools.h"
64#include "TMVA/Types.h"
65
66#include "TH1.h"
67#include "TString.h"
68#include "TFitter.h"
69#include "TMatrixD.h"
70#include "TMath.h"
71
72#include <iostream>
73#include <cmath>
74#include <vector>
75
76#ifdef MethodMLP_UseMinuit__
77TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
78Bool_t MethodMLP_UseMinuit = kTRUE;
79#endif
80
82
83
84 using std::vector;
85
86////////////////////////////////////////////////////////////////////////////////
87/// standard constructor
88
90 const TString& methodTitle,
91 DataSetInfo& theData,
92 const TString& theOption)
93 : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
94 fUseRegulator(false), fCalculateErrors(false),
95 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
99 fLastAlpha(0.0), fTau(0.),
100 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
101 fBPMode(kSequential), fBpModeS("None"),
102 fBatchSize(0), fTestRate(0), fEpochMon(false),
104 fGA_SC_rate(0), fGA_SC_factor(0.0),
106 fWeightRange (1.0)
107{
108
109}
110
111////////////////////////////////////////////////////////////////////////////////
112/// constructor from a weight file
113
115 const TString& theWeightFile)
116 : MethodANNBase( Types::kMLP, theData, theWeightFile),
117 fUseRegulator(false), fCalculateErrors(false),
118 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
121 fSamplingTraining(false), fSamplingTesting(false),
122 fLastAlpha(0.0), fTau(0.),
123 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
124 fBPMode(kSequential), fBpModeS("None"),
125 fBatchSize(0), fTestRate(0), fEpochMon(false),
127 fGA_SC_rate(0), fGA_SC_factor(0.0),
129 fWeightRange (1.0)
130{
131}
132
133////////////////////////////////////////////////////////////////////////////////
134/// destructor
135/// nothing to be done
136
140
142{
143 Train(NumCycles());
144}
145
146
147
148////////////////////////////////////////////////////////////////////////////////
149/// MLP can handle classification with 2 classes and regression with one regression-target
150
152{
153 if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
154 if (type == Types::kMulticlass ) return kTRUE;
155 if (type == Types::kRegression ) return kTRUE;
156
157 return kFALSE;
158}
159
160////////////////////////////////////////////////////////////////////////////////
161/// default initializations
162
164{
165 // the minimum requirement to declare an event signal-like
167#ifdef MethodMLP_UseMinuit__
168 fgThis = this;
169#endif
170}
171
172////////////////////////////////////////////////////////////////////////////////
173/// define the options (their key words) that can be set in the option string
174///
175/// know options:
176///
177/// - TrainingMethod `<string>` Training method
178/// available values are:
179/// - BP Back-Propagation `<default>`
180/// - GA Genetic Algorithm (takes a LONG time)
181///
182/// - LearningRate `<float>` NN learning rate parameter
183/// - DecayRate `<float>` Decay rate for learning parameter
184/// - TestRate `<int>` Test for overtraining performed at each #%th epochs
185///
186/// - BPMode `<string>` Back-propagation learning mode
187/// available values are:
188/// - sequential `<default>`
189/// - batch
190///
191/// - BatchSize `<int>` Batch size: number of events/batch, only set if in Batch Mode,
192/// - -1 for BatchSize=number_of_events
193
195{
196 DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
197 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
198 AddPreDefVal(TString("BP"));
199 AddPreDefVal(TString("GA"));
200 AddPreDefVal(TString("BFGS"));
201
202 DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
203 DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
204 DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
205 DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
206
207 DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
208 DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
209 DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
210
211 DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
212 DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
213
214 DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
215 DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
216
217 DeclareOptionRef(fBpModeS="sequential", "BPMode",
218 "Back-propagation learning mode: sequential or batch");
219 AddPreDefVal(TString("sequential"));
220 AddPreDefVal(TString("batch"));
221
222 DeclareOptionRef(fBatchSize=-1, "BatchSize",
223 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
224
225 DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
226 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
227
228 DeclareOptionRef(fSteps=-1, "ConvergenceTests",
229 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
230
231 DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
232 "Use regulator to avoid over-training"); //zjh
233 DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
234 "Maximum times of regulator update"); //zjh
235 DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
236 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
237
238 DeclareOptionRef(fWeightRange=1.0, "WeightRange",
239 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
240
241}
242
243////////////////////////////////////////////////////////////////////////////////
244/// process user options
245
247{
249
250
252 Log() << kINFO
253 << "Will ignore negative events in training!"
254 << Endl;
255 }
256
257
258 if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
259 else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
260 else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
261
262 if (fBpModeS == "sequential") fBPMode = kSequential;
263 else if (fBpModeS == "batch") fBPMode = kBatch;
264
265 // InitializeLearningRates();
266
267 if (fBPMode == kBatch) {
268 Data()->SetCurrentType(Types::kTraining);
269 Int_t numEvents = Data()->GetNEvents();
270 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
271 }
272}
273
274////////////////////////////////////////////////////////////////////////////////
275/// initialize learning rates of synapses, used only by back propagation
276
278{
279 Log() << kDEBUG << "Initialize learning rates" << Endl;
280 TSynapse *synapse;
281 Int_t numSynapses = fSynapses->GetEntriesFast();
282 for (Int_t i = 0; i < numSynapses; i++) {
283 synapse = (TSynapse*)fSynapses->At(i);
284 synapse->SetLearningRate(fLearnRate);
285 }
286}
287
288////////////////////////////////////////////////////////////////////////////////
289/// calculate the estimator that training is attempting to minimize
290
292{
293 // sanity check
294 if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
295 Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
296 }
297
298 Types::ETreeType saveType = Data()->GetCurrentType();
299 Data()->SetCurrentType(treeType);
300
301 // if epochs are counted create monitoring histograms (only available for classification)
302 TString type = (treeType == Types::kTraining ? "train" : "test");
303 TString name = TString::Format("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
304 TString nameB = name + "_B";
305 TString nameS = name + "_S";
306 Int_t nbin = 100;
307 Float_t limit = 2;
308 TH1* histS = 0;
309 TH1* histB = 0;
310 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
311 histS = new TH1F( nameS, nameS, nbin, -limit, limit );
312 histB = new TH1F( nameB, nameB, nbin, -limit, limit );
313 }
314
315 Double_t estimator = 0;
316
317 // loop over all training events
318 Int_t nEvents = GetNEvents();
319 UInt_t nClasses = DataInfo().GetNClasses();
320 UInt_t nTgts = DataInfo().GetNTargets();
321
322
323 Float_t sumOfWeights = 0.f;
324 if( fWeightRange < 1.f ){
325 fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
326 }
327
328 for (Int_t i = 0; i < nEvents; i++) {
329
330 const Event* ev = GetEvent(i);
331
333 && (saveType == Types::kTraining)){
334 continue;
335 }
336
337 Double_t w = ev->GetWeight();
338
339 ForceNetworkInputs( ev );
341
342 Double_t d = 0, v = 0;
343 if (DoRegression()) {
344 for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
345 v = GetOutputNeuron( itgt )->GetActivationValue();
346 Double_t targetValue = ev->GetTarget( itgt );
347 Double_t dt = v - targetValue;
348 d += (dt*dt);
349 }
350 estimator += d*w;
351 } else if (DoMulticlass() ) {
352 UInt_t cls = ev->GetClass();
353 if (fEstimator==kCE){
354 Double_t norm(0);
355 for (UInt_t icls = 0; icls < nClasses; icls++) {
356 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
357 norm += exp( activationValue );
358 if(icls==cls)
359 d = exp( activationValue );
360 }
361 d = -TMath::Log(d/norm);
362 }
363 else{
364 for (UInt_t icls = 0; icls < nClasses; icls++) {
365 Double_t desired = (icls==cls) ? 1.0 : 0.0;
366 v = GetOutputNeuron( icls )->GetActivationValue();
367 d = (desired-v)*(desired-v);
368 }
369 }
370 estimator += d*w; //zjh
371 } else {
372 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
373 v = GetOutputNeuron()->GetActivationValue();
374 if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
375 else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
376 estimator += d*w; //zjh
377 }
378
380 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
381
382 sumOfWeights += w;
383
384
385 // fill monitoring histograms
386 if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
387 else if (histB != 0) histB->Fill( float(v), float(w) );
388 }
389
390
392 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
393
394 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
395 estimator = 0.f;
396
397 Float_t weightRangeCut = fWeightRange*sumOfWeights;
398 Float_t weightSum = 0.f;
399 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
400 float deviation = (*itDev).first;
401 float devWeight = (*itDev).second;
402 weightSum += devWeight; // add the weight of this event
403 if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
404 estimator += devWeight*deviation;
405 }
406 }
407
408 sumOfWeights = sumOfWeightsInRange;
410 }
411
412 if (histS != 0) fEpochMonHistS.push_back( histS );
413 if (histB != 0) fEpochMonHistB.push_back( histB );
414
415 //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
416 //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
417 //else estimator = estimator*0.5/Float_t(nEvents);
418 estimator = estimator/Float_t(sumOfWeights);
419
420
421 //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
422
423 Data()->SetCurrentType( saveType );
424
425 // provide epoch-wise monitoring
426 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
427 CreateWeightMonitoringHists( TString::Format("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
428 }
429
430 return estimator;
431}
432
433////////////////////////////////////////////////////////////////////////////////
434
436{
437 if (fNetwork == 0) {
438 //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
439 Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
441 }
442 Log() << kDEBUG << "reinitialize learning rates" << Endl;
444 Log() << kHEADER;
445 PrintMessage("Training Network");
446 Log() << Endl;
447 Int_t nEvents=GetNEvents();
448 Int_t nSynapses=fSynapses->GetEntriesFast();
449 if (nSynapses>nEvents)
450 Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
451
452 fIPyMaxIter = nEpochs;
453 if (fInteractive && fInteractive->NotInitialized()){
454 std::vector<TString> titles = {"Error on training set", "Error on test set"};
455 fInteractive->Init(titles);
456 }
457
458#ifdef MethodMLP_UseMinuit__
459 if (useMinuit) MinuitMinimize();
460#else
462 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
463 else BackPropagationMinimize(nEpochs);
464#endif
465
466 float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
467 float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
468 if (fUseRegulator){
469 Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
471 Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
472 }
473
475 {
476 Int_t numSynapses=fSynapses->GetEntriesFast();
477 fInvHessian.ResizeTo(numSynapses,numSynapses);
479 }
481}
482
483////////////////////////////////////////////////////////////////////////////////
484/// train network with BFGS algorithm
485
487{
488 Timer timer( (fSteps>0?100:nEpochs), GetName() );
489
490 // create histograms for overtraining monitoring
491 Int_t nbinTest = Int_t(nEpochs/fTestRate);
492 if(!IsSilentFile())
493 {
494 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
495 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
496 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
497 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
498 }
499
500 Int_t nSynapses = fSynapses->GetEntriesFast();
501 Int_t nWeights = nSynapses;
502
503 for (Int_t i=0;i<nSynapses;i++) {
504 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
505 synapse->SetDEDw(0.0);
506 }
507
508 std::vector<Double_t> buffer( nWeights );
509 for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
510
511 TMatrixD Dir ( nWeights, 1 );
512 TMatrixD Hessian ( nWeights, nWeights );
513 TMatrixD Gamma ( nWeights, 1 );
514 TMatrixD Delta ( nWeights, 1 );
515 Int_t RegUpdateCD=0; //zjh
516 Int_t RegUpdateTimes=0; //zjh
517 Double_t AccuError=0;
518
519 Double_t trainE = -1;
520 Double_t testE = -1;
521
522 fLastAlpha = 0.;
523
525 Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
526
527 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
528 timer.DrawProgressBar( 0 );
529
530 // start training cycles (epochs)
531 for (Int_t i = 0; i < nEpochs; i++) {
532
533 if (fExitFromTraining) break;
534 fIPyCurrentIter = i;
535 if (Float_t(i)/nEpochs < fSamplingEpoch) {
536 if ((i+1)%fTestRate == 0 || (i == 0)) {
537 if (fSamplingTraining) {
538 Data()->SetCurrentType( Types::kTraining );
539 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
540 Data()->CreateSampling();
541 }
542 if (fSamplingTesting) {
543 Data()->SetCurrentType( Types::kTesting );
544 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
545 Data()->CreateSampling();
546 }
547 }
548 }
549 else {
550 Data()->SetCurrentType( Types::kTraining );
551 Data()->InitSampling(1.0,1.0);
552 Data()->SetCurrentType( Types::kTesting );
553 Data()->InitSampling(1.0,1.0);
554 }
555 Data()->SetCurrentType( Types::kTraining );
556
557 //zjh
558 if (fUseRegulator) {
559 UpdatePriors();
560 RegUpdateCD++;
561 }
562 //zjh
563
564 SetGammaDelta( Gamma, Delta, buffer );
565
566 if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
567 SteepestDir( Dir );
568 Hessian.UnitMatrix();
569 RegUpdateCD=0; //zjh
570 }
571 else {
572 if (GetHessian( Hessian, Gamma, Delta )) {
573 SteepestDir( Dir );
574 Hessian.UnitMatrix();
575 RegUpdateCD=0; //zjh
576 }
577 else SetDir( Hessian, Dir );
578 }
579
580 Double_t dError=0; //zjh
581 if (DerivDir( Dir ) > 0) {
582 SteepestDir( Dir );
583 Hessian.UnitMatrix();
584 RegUpdateCD=0; //zjh
585 }
586 if (LineSearch( Dir, buffer, &dError )) { //zjh
587 Hessian.UnitMatrix();
588 SteepestDir( Dir );
589 RegUpdateCD=0; //zjh
590 if (LineSearch(Dir, buffer, &dError)) { //zjh
591 i = nEpochs;
592 Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
593 }
594 }
595
596 //zjh+
597 if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
598 AccuError+=dError;
599
600 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
601 Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
603 Hessian.UnitMatrix();
604 RegUpdateCD=0;
605 RegUpdateTimes++;
606 AccuError=0;
607 }
608 //zjh-
609
610 // monitor convergence of training and control sample
611 if ((i+1)%fTestRate == 0) {
612 //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
613 //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
614 trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
615 testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
616 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
617 if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
618 {
619 fEstimatorHistTrain->Fill( i+1, trainE );
620 fEstimatorHistTest ->Fill( i+1, testE );
621 }
622 Bool_t success = kFALSE;
623 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
624 success = kTRUE;
625 }
626 Data()->EventResult( success );
627
628 SetCurrentValue( testE );
629 if (HasConverged()) {
630 if (Float_t(i)/nEpochs < fSamplingEpoch) {
631 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
632 i = newEpoch;
634 }
635 else break;
636 }
637 }
638
639 // draw progress
640 TString convText = TString::Format( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
641 if (fSteps > 0) {
642 Float_t progress = 0;
643 if (Float_t(i)/nEpochs < fSamplingEpoch)
644 // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
646 else
647 {
648 // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
649 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
650 }
651 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
652 if (progress2>progress) progress=progress2; //zjh
653 timer.DrawProgressBar( Int_t(progress), convText );
654 }
655 else {
656 Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
657 if (progress<i) progress=i; //zjh
658 timer.DrawProgressBar( progress, convText ); //zjh
659 }
660
661 // some verbose output
662 if (fgPRINT_SEQ) {
663 PrintNetwork();
665 }
666 }
667}
668
669////////////////////////////////////////////////////////////////////////////////
670
671void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
672{
673 Int_t nWeights = fSynapses->GetEntriesFast();
674
675 Int_t IDX = 0;
676 Int_t nSynapses = fSynapses->GetEntriesFast();
677 for (Int_t i=0;i<nSynapses;i++) {
678 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
679 Gamma[IDX++][0] = -synapse->GetDEDw();
680 }
681
682 for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
683
684 ComputeDEDw();
685
686 IDX = 0;
687 for (Int_t i=0;i<nSynapses;i++)
688 {
689 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
690 Gamma[IDX++][0] += synapse->GetDEDw();
691 }
692}
693
694////////////////////////////////////////////////////////////////////////////////
695
697{
698 Int_t nSynapses = fSynapses->GetEntriesFast();
699 for (Int_t i=0;i<nSynapses;i++) {
700 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
701 synapse->SetDEDw( 0.0 );
702 }
703
704 Int_t nEvents = GetNEvents();
705 Int_t nPosEvents = nEvents;
706 for (Int_t i=0;i<nEvents;i++) {
707
708 const Event* ev = GetEvent(i);
710 && (Data()->GetCurrentType() == Types::kTraining)){
711 --nPosEvents;
712 continue;
713 }
714
715 SimulateEvent( ev );
716
717 for (Int_t j=0;j<nSynapses;j++) {
718 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
719 synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
720 }
721 }
722
723 for (Int_t i=0;i<nSynapses;i++) {
724 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
725 Double_t DEDw=synapse->GetDEDw(); //zjh
726 if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
727 synapse->SetDEDw( DEDw / nPosEvents ); //zjh
728 }
729}
730
731////////////////////////////////////////////////////////////////////////////////
732
734{
735 Double_t eventWeight = ev->GetWeight();
736
737 ForceNetworkInputs( ev );
739
740 if (DoRegression()) {
741 UInt_t ntgt = DataInfo().GetNTargets();
742 for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
743 Double_t desired = ev->GetTarget(itgt);
744 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
745 GetOutputNeuron( itgt )->SetError(error);
746 }
747 } else if (DoMulticlass()) {
748 UInt_t nClasses = DataInfo().GetNClasses();
749 UInt_t cls = ev->GetClass();
750 for (UInt_t icls = 0; icls < nClasses; icls++) {
751 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
752 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
753 GetOutputNeuron( icls )->SetError(error);
754 }
755 } else {
756 Double_t desired = GetDesiredOutput( ev );
757 Double_t error=-1; //zjh
758 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
759 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
760 GetOutputNeuron()->SetError(error);
761 }
762
764 for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
765 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
766 synapse->InitDelta();
767 synapse->CalculateDelta();
768 }
769}
770
771////////////////////////////////////////////////////////////////////////////////
772
774{
775 Int_t IDX = 0;
776 Int_t nSynapses = fSynapses->GetEntriesFast();
777
778 for (Int_t i=0;i<nSynapses;i++) {
779 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
780 Dir[IDX++][0] = -synapse->GetDEDw();
781 }
782}
783
784////////////////////////////////////////////////////////////////////////////////
785
787{
788 TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
789 if ((Double_t) gd[0][0] == 0.) return kTRUE;
790 TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
791 TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
792 TMatrixD gHg(Gamma, TMatrixD::kTransposeMult, aHg);
793 Double_t a = 1 / (Double_t) gd[0][0];
794 Double_t f = 1 + ((Double_t)gHg[0][0]*a);
796 res *= f;
797 res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
799 res *= a;
800 Hessian += res;
801
802 return kFALSE;
803}
804
805////////////////////////////////////////////////////////////////////////////////
806
808{
809 Int_t IDX = 0;
810 Int_t nSynapses = fSynapses->GetEntriesFast();
811 TMatrixD DEDw(nSynapses, 1);
812
813 for (Int_t i=0;i<nSynapses;i++) {
814 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
815 DEDw[IDX++][0] = synapse->GetDEDw();
816 }
817
818 dir = Hessian * DEDw;
819 for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
820}
821
822////////////////////////////////////////////////////////////////////////////////
823
825{
826 Int_t IDX = 0;
827 Int_t nSynapses = fSynapses->GetEntriesFast();
828 Double_t Result = 0.0;
829
830 for (Int_t i=0;i<nSynapses;i++) {
831 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
832 Result += Dir[IDX++][0] * synapse->GetDEDw();
833 }
834 return Result;
835}
836
837////////////////////////////////////////////////////////////////////////////////
838
839Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
840{
841 Int_t IDX = 0;
842 Int_t nSynapses = fSynapses->GetEntriesFast();
843 Int_t nWeights = nSynapses;
844
845 std::vector<Double_t> Origin(nWeights);
846 for (Int_t i=0;i<nSynapses;i++) {
847 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
848 Origin[i] = synapse->GetWeight();
849 }
850
851 Double_t err1 = GetError();
852 Double_t errOrigin=err1;//zjh
853 Double_t alpha1 = 0.;
854 Double_t alpha2 = fLastAlpha;
855
856
857 if (alpha2 < 0.01) alpha2 = 0.01;
858 else if (alpha2 > 2.0) alpha2 = 2.0;
859 Double_t alpha_original = alpha2;
860 Double_t alpha3 = alpha2;
861
862 SetDirWeights( Origin, Dir, alpha2 );
863 Double_t err2 = GetError();
864 //Double_t err2 = err1;
865 Double_t err3 = err2;
866 Bool_t bingo = kFALSE;
867
868
869 if (err1 > err2) {
870 for (Int_t i=0;i<100;i++) {
871 alpha3 *= fTau;
872 SetDirWeights(Origin, Dir, alpha3);
873 err3 = GetError();
874 if (err3 > err2) {
875 bingo = kTRUE;
876 break;
877 }
878 alpha1 = alpha2;
879 err1 = err2;
880 alpha2 = alpha3;
881 err2 = err3;
882 }
883 if (!bingo) {
884 SetDirWeights(Origin, Dir, 0.);
885 return kTRUE;
886 }
887 }
888 else {
889 for (Int_t i=0;i<100;i++) {
890 alpha2 /= fTau;
891 if (i==50) {
892 Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
893 alpha2 = -alpha_original;
894 }
895 SetDirWeights(Origin, Dir, alpha2);
896 err2 = GetError();
897 if (err1 > err2) {
898 bingo = kTRUE;
899 break;
900 }
901 alpha3 = alpha2;
902 err3 = err2;
903 }
904 if (!bingo) {
905 SetDirWeights(Origin, Dir, 0.);
906 Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
907 fLastAlpha = 0.05;
908 return kTRUE;
909 }
910 }
911
912 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
913 fLastAlpha = 0.5 * (alpha1 + alpha3 -
914 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
915 - ( err2 - err1 ) / (alpha2 - alpha1 )));
916 }
917 else {
918 fLastAlpha = alpha2;
919 }
920
921 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
922
923 SetDirWeights(Origin, Dir, fLastAlpha);
924
925 // leaving these lines uncommented is a heavy price to pay for only a warning message
926 // (which shouldn't appear anyway)
927 // --> about 15% of time is spent in the final GetError().
928 //
929 Double_t finalError = GetError();
930 if (finalError > err1) {
931 Log() << kWARNING << "Line search increased error! Something is wrong."
932 << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
933 << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
934 }
935
936 for (Int_t i=0;i<nSynapses;i++) {
937 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
938 buffer[IDX] = synapse->GetWeight() - Origin[IDX];
939 IDX++;
940 }
941
942 if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
943
944 return kFALSE;
945}
946
947////////////////////////////////////////////////////////////////////////////////
948
949void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
950{
951 Int_t IDX = 0;
952 Int_t nSynapses = fSynapses->GetEntriesFast();
953
954 for (Int_t i=0;i<nSynapses;i++) {
955 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
956 synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
957 IDX++;
958 }
959 if (fUseRegulator) UpdatePriors();//zjh
960}
961
962
963////////////////////////////////////////////////////////////////////////////////
964
966{
967 Int_t nEvents = GetNEvents();
968 UInt_t ntgts = GetNTargets();
969 Double_t Result = 0.;
970
971 for (Int_t i=0;i<nEvents;i++) {
972 const Event* ev = GetEvent(i);
973
975 && (Data()->GetCurrentType() == Types::kTraining)){
976 continue;
977 }
978 SimulateEvent( ev );
979
980 Double_t error = 0.;
981 if (DoRegression()) {
982 for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
983 error += GetMSEErr( ev, itgt );//zjh
984 }
985 } else if ( DoMulticlass() ){
986 for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
987 error += GetMSEErr( ev, icls );
988 }
989 } else {
990 if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
991 else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
992 }
993 Result += error * ev->GetWeight();
994 }
995 if (fUseRegulator) Result+=fPrior; //zjh
996 if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
997 return Result;
998}
999
1000////////////////////////////////////////////////////////////////////////////////
1001
1003{
1004 Double_t error = 0;
1005 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1006 Double_t target = 0;
1007 if (DoRegression()) target = ev->GetTarget( index );
1008 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1009 else target = GetDesiredOutput( ev );
1010
1011 error = 0.5*(output-target)*(output-target); //zjh
1012
1013 return error;
1014
1015}
1016
1017////////////////////////////////////////////////////////////////////////////////
1018
1020{
1021 Double_t error = 0;
1022 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1023 Double_t target = 0;
1024 if (DoRegression()) target = ev->GetTarget( index );
1025 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1026 else target = GetDesiredOutput( ev );
1027
1028 error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1029
1030 return error;
1031}
1032
1033////////////////////////////////////////////////////////////////////////////////
1034/// minimize estimator / train network with back propagation algorithm
1035
1037{
1038 // Timer timer( nEpochs, GetName() );
1039 Timer timer( (fSteps>0?100:nEpochs), GetName() );
1040 Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1041
1042 // create histograms for overtraining monitoring
1043 Int_t nbinTest = Int_t(nEpochs/fTestRate);
1044 if(!IsSilentFile())
1045 {
1046 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1047 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1048 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1049 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1050 }
1052 Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1053
1054 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1055 timer.DrawProgressBar(0);
1056
1057 // estimators
1058 Double_t trainE = -1;
1059 Double_t testE = -1;
1060
1061 // start training cycles (epochs)
1062 for (Int_t i = 0; i < nEpochs; i++) {
1063
1064 if (fExitFromTraining) break;
1065 fIPyCurrentIter = i;
1066 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1067 if ((i+1)%fTestRate == 0 || (i == 0)) {
1068 if (fSamplingTraining) {
1069 Data()->SetCurrentType( Types::kTraining );
1070 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1071 Data()->CreateSampling();
1072 }
1073 if (fSamplingTesting) {
1074 Data()->SetCurrentType( Types::kTesting );
1075 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1076 Data()->CreateSampling();
1077 }
1078 }
1079 }
1080 else {
1081 Data()->SetCurrentType( Types::kTraining );
1082 Data()->InitSampling(1.0,1.0);
1083 Data()->SetCurrentType( Types::kTesting );
1084 Data()->InitSampling(1.0,1.0);
1085 }
1086 Data()->SetCurrentType( Types::kTraining );
1087
1088 TrainOneEpoch();
1089 DecaySynapseWeights(i >= lateEpoch);
1090
1091 // monitor convergence of training and control sample
1092 if ((i+1)%fTestRate == 0) {
1093 trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1094 testE = CalculateEstimator( Types::kTesting, i ); // estimator for test sample
1095 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1096 if(!IsSilentFile())
1097 {
1098 fEstimatorHistTrain->Fill( i+1, trainE );
1099 fEstimatorHistTest ->Fill( i+1, testE );
1100 }
1101 Bool_t success = kFALSE;
1102 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1103 success = kTRUE;
1104 }
1105 Data()->EventResult( success );
1106
1107 SetCurrentValue( testE );
1108 if (HasConverged()) {
1109 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1110 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1111 i = newEpoch;
1113 }
1114 else {
1115 if (lateEpoch > i) lateEpoch = i;
1116 else break;
1117 }
1118 }
1119 }
1120
1121 // draw progress bar (add convergence value)
1122 TString convText = TString::Format( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1123 if (fSteps > 0) {
1124 Float_t progress = 0;
1125 if (Float_t(i)/nEpochs < fSamplingEpoch)
1126 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1127 else
1129
1130 timer.DrawProgressBar( Int_t(progress), convText );
1131 }
1132 else {
1133 timer.DrawProgressBar( i, convText );
1134 }
1135 }
1136}
1137
1138////////////////////////////////////////////////////////////////////////////////
1139/// train network over a single epoch/cycle of events
1140
1142{
1143 Int_t nEvents = Data()->GetNEvents();
1144
1145 // randomize the order events will be presented, important for sequential mode
1146 Int_t* index = new Int_t[nEvents];
1147 for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1148 Shuffle(index, nEvents);
1149
1150 // loop over all training events
1151 for (Int_t i = 0; i < nEvents; i++) {
1152
1153 const Event * ev = GetEvent(index[i]);
1155 && (Data()->GetCurrentType() == Types::kTraining)){
1156 continue;
1157 }
1158
1159 TrainOneEvent(index[i]);
1160
1161 // do adjustments if in batch mode
1162 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1164 if (fgPRINT_BATCH) {
1165 PrintNetwork();
1167 }
1168 }
1169
1170 // debug in sequential mode
1171 if (fgPRINT_SEQ) {
1172 PrintNetwork();
1174 }
1175 }
1176
1177 delete[] index;
1178}
1179
1180////////////////////////////////////////////////////////////////////////////////
1181/// Input:
1182/// - index: the array to shuffle
1183/// - n: the size of the array
1184/// Output:
1185/// - index: the shuffled indexes
1186///
1187/// This method is used for sequential training
1188
1190{
1191 Int_t j, k;
1192 Int_t a = n - 1;
1193 for (Int_t i = 0; i < n; i++) {
1194 j = (Int_t) (frgen->Rndm() * a);
1195 if (j<n){ // address the 'worries' of coverity
1196 k = index[j];
1197 index[j] = index[i];
1198 index[i] = k;
1199 }
1200 }
1201}
1202
1203////////////////////////////////////////////////////////////////////////////////
1204/// decay synapse weights
1205/// in last 10 epochs, lower learning rate even more to find a good minimum
1206
1208{
1209 TSynapse* synapse;
1210 Int_t numSynapses = fSynapses->GetEntriesFast();
1211 for (Int_t i = 0; i < numSynapses; i++) {
1212 synapse = (TSynapse*)fSynapses->At(i);
1213 if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1214 else synapse->DecayLearningRate(fDecayRate);
1215 }
1216}
1217
1218////////////////////////////////////////////////////////////////////////////////
1219/// fast per-event training
1220
1222{
1223 GetEvent(ievt);
1224
1225 // as soon as we know how to get event weights, get that here
1226
1227 // note: the normalization of event weights will affect the choice
1228 // of learning rate, one will have to experiment to get the right value.
1229 // in general, if the "average" event weight is 1, the learning rate
1230 // should be good if set around 0.02 (a good value if all event weights are 1)
1231 Double_t eventWeight = 1.0;
1232
1233 // get the desired output of this event
1234 Double_t desired;
1235 if (type == 0) desired = fOutput->GetMin(); // background //zjh
1236 else desired = fOutput->GetMax(); // signal //zjh
1237
1238 // force the value for each input neuron
1239 Double_t x;
1240 TNeuron* neuron;
1241
1242 for (UInt_t j = 0; j < GetNvar(); j++) {
1243 x = branchVar[j];
1244 if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1245 neuron = GetInputNeuron(j);
1246 neuron->ForceValue(x);
1247 }
1248
1250 UpdateNetwork(desired, eventWeight);
1251}
1252
1253////////////////////////////////////////////////////////////////////////////////
1254/// train network over a single event
1255/// this uses the new event model
1256
1258{
1259 // note: the normalization of event weights will affect the choice
1260 // of learning rate, one will have to experiment to get the right value.
1261 // in general, if the "average" event weight is 1, the learning rate
1262 // should be good if set around 0.02 (a good value if all event weights are 1)
1263
1264 const Event * ev = GetEvent(ievt);
1265 Double_t eventWeight = ev->GetWeight();
1266 ForceNetworkInputs( ev );
1268 if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1269 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1270 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1271}
1272
1273////////////////////////////////////////////////////////////////////////////////
1274/// get the desired output of this event
1275
1277{
1278 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1279}
1280
1281////////////////////////////////////////////////////////////////////////////////
1282/// update the network based on how closely
1283/// the output matched the desired output
1284
1286{
1287 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1288 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1289 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1290 else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1291 error *= eventWeight;
1292 GetOutputNeuron()->SetError(error);
1295}
1296
1297////////////////////////////////////////////////////////////////////////////////
1298/// update the network based on how closely
1299/// the output matched the desired output
1300
1301void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1302{
1303 // Norm for softmax
1304 Double_t norm = 0.;
1305 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1306 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1307 norm += TMath::Exp(act);
1308 }
1309
1310 // Get output of network, and apply softmax
1311 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1312 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1313 Double_t output = TMath::Exp(act) / norm;
1314 Double_t error = output - desired.at(i);
1315 error *= eventWeight;
1316 GetOutputNeuron(i)->SetError(error);
1317 }
1318
1319 // Do backpropagation
1322}
1323
1324////////////////////////////////////////////////////////////////////////////////
1325/// have each neuron calculate its delta by back propagation
1326
1328{
1329 TNeuron* neuron;
1330 Int_t numNeurons;
1331 Int_t numLayers = fNetwork->GetEntriesFast();
1332 TObjArray* curLayer;
1333
1334 // step backwards through the network (back propagation)
1335 // deltas calculated starting at output layer
1336 for (Int_t i = numLayers-1; i >= 0; i--) {
1337 curLayer = (TObjArray*)fNetwork->At(i);
1338 numNeurons = curLayer->GetEntriesFast();
1339
1340 for (Int_t j = 0; j < numNeurons; j++) {
1341 neuron = (TNeuron*) curLayer->At(j);
1342 neuron->CalculateDelta();
1343 }
1344 }
1345}
1346
1347////////////////////////////////////////////////////////////////////////////////
1348/// create genetics class similar to GeneticCut
1349/// give it vector of parameter ranges (parameters = weights)
1350/// link fitness function of this class to ComputeEstimator
1351/// instantiate GA (see MethodCuts)
1352/// run it
1353/// then this should exist for GA, Minuit and random sampling
1354
1356{
1357 PrintMessage("Minimizing Estimator with GA");
1358
1359 // define GA parameters
1360 fGA_preCalc = 1;
1361 fGA_SC_steps = 10;
1362 fGA_SC_rate = 5;
1363 fGA_SC_factor = 0.95;
1364 fGA_nsteps = 30;
1365
1366 // ranges
1367 std::vector<Interval*> ranges;
1368
1369 Int_t numWeights = fSynapses->GetEntriesFast();
1370 for (Int_t ivar=0; ivar< numWeights; ivar++) {
1371 ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1372 }
1373
1374 FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1375 gf->Run();
1376
1377 Double_t estimator = CalculateEstimator();
1378 Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1379}
1380
1381////////////////////////////////////////////////////////////////////////////////
1382/// interface to the estimate
1383
1384Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1385{
1386 return ComputeEstimator( parameters );
1387}
1388
1389////////////////////////////////////////////////////////////////////////////////
1390/// this function is called by GeneticANN for GA optimization
1391
1392Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1393{
1394 TSynapse* synapse;
1395 Int_t numSynapses = fSynapses->GetEntriesFast();
1396
1397 for (Int_t i = 0; i < numSynapses; i++) {
1398 synapse = (TSynapse*)fSynapses->At(i);
1399 synapse->SetWeight(parameters.at(i));
1400 }
1401 if (fUseRegulator) UpdatePriors(); //zjh
1402
1403 Double_t estimator = CalculateEstimator();
1404
1405 return estimator;
1406}
1407
1408////////////////////////////////////////////////////////////////////////////////
1409/// update synapse error fields and adjust the weights (if in sequential mode)
1410
1412{
1413 TNeuron* neuron;
1414 Int_t numNeurons;
1415 TObjArray* curLayer;
1416 Int_t numLayers = fNetwork->GetEntriesFast();
1417
1418 for (Int_t i = 0; i < numLayers; i++) {
1419 curLayer = (TObjArray*)fNetwork->At(i);
1420 numNeurons = curLayer->GetEntriesFast();
1421
1422 for (Int_t j = 0; j < numNeurons; j++) {
1423 neuron = (TNeuron*) curLayer->At(j);
1424 if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1425 else neuron->UpdateSynapsesSequential();
1426 }
1427 }
1428}
1429
1430////////////////////////////////////////////////////////////////////////////////
1431/// just adjust the synapse weights (should be called in batch mode)
1432
1434{
1435 TNeuron* neuron;
1436 Int_t numNeurons;
1437 TObjArray* curLayer;
1438 Int_t numLayers = fNetwork->GetEntriesFast();
1439
1440 for (Int_t i = numLayers-1; i >= 0; i--) {
1441 curLayer = (TObjArray*)fNetwork->At(i);
1442 numNeurons = curLayer->GetEntriesFast();
1443
1444 for (Int_t j = 0; j < numNeurons; j++) {
1445 neuron = (TNeuron*) curLayer->At(j);
1446 neuron->AdjustSynapseWeights();
1447 }
1448 }
1449}
1450
1451////////////////////////////////////////////////////////////////////////////////
1452
1454{
1455 fPrior=0;
1456 fPriorDev.clear();
1457 Int_t nSynapses = fSynapses->GetEntriesFast();
1458 for (Int_t i=0;i<nSynapses;i++) {
1459 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1460 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1461 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1462 }
1463}
1464
1465////////////////////////////////////////////////////////////////////////////////
1466
1468{
1469 TMatrixD InvH(0,0);
1470 GetApproxInvHessian(InvH);
1471 Int_t numSynapses=fSynapses->GetEntriesFast();
1472 Int_t numRegulators=fRegulators.size();
1473 Float_t gamma=0,
1474 variance=1.; // Gaussian noise
1475 std::vector<Int_t> nWDP(numRegulators);
1476 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1477 for (int i=0;i<numSynapses;i++) {
1478 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1479 Int_t idx=fRegulatorIdx[i];
1480 nWDP[idx]++;
1481 trace[idx]+=InvH[i][i];
1482 gamma+=1-fRegulators[idx]*InvH[i][i];
1483 weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1484 }
1485 if (fEstimator==kMSE) {
1486 if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1487 else variance=CalculateEstimator( Types::kTraining, 0 );
1488 }
1489
1490 //Log() << kDEBUG << Endl;
1491 for (int i=0;i<numRegulators;i++)
1492 {
1493 //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1494 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1495 if (fRegulators[i]<0) fRegulators[i]=0;
1496 Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1497 }
1498 float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1499 float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1500
1501 Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1502
1503}
1504
1505////////////////////////////////////////////////////////////////////////////////
1506
1507void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1508{
1509 Int_t numSynapses=fSynapses->GetEntriesFast();
1510 InvHessian.ResizeTo( numSynapses, numSynapses );
1511 InvHessian=0;
1512 TMatrixD sens(numSynapses,1);
1513 TMatrixD sensT(1,numSynapses);
1514 Int_t nEvents = GetNEvents();
1515 for (Int_t i=0;i<nEvents;i++) {
1516 GetEvent(i);
1517 double outputValue=GetMvaValue(); // force calculation
1518 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1520 for (Int_t j = 0; j < numSynapses; j++){
1521 TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1522 synapses->InitDelta();
1523 synapses->CalculateDelta();
1524 sens[j][0]=sensT[0][j]=synapses->GetDelta();
1525 }
1526 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1527 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1528 }
1529
1530 // TVectorD eValue(numSynapses);
1531 if (regulate) {
1532 for (Int_t i = 0; i < numSynapses; i++){
1533 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1534 }
1535 }
1536 else {
1537 for (Int_t i = 0; i < numSynapses; i++){
1538 InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1539 }
1540 }
1541
1542 InvHessian.Invert();
1543
1544}
1545
1546////////////////////////////////////////////////////////////////////////////////
1547
1549{
1550 Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1551
1552 // no hessian (old training file) or no error requested
1553 if (!fCalculateErrors || errLower==0 || errUpper==0)
1554 return MvaValue;
1555
1556 Double_t MvaUpper,MvaLower,median,variance;
1557 Int_t numSynapses=fSynapses->GetEntriesFast();
1558 if (fInvHessian.GetNcols()!=numSynapses) {
1559 Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1560 }
1561 TMatrixD sens(numSynapses,1);
1562 TMatrixD sensT(1,numSynapses);
1563 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1564 //GetOutputNeuron()->SetError(1.);
1566 for (Int_t i = 0; i < numSynapses; i++){
1567 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1568 synapses->InitDelta();
1569 synapses->CalculateDelta();
1570 sensT[0][i]=synapses->GetDelta();
1571 }
1572 sens.Transpose(sensT);
1573 TMatrixD sig=sensT*fInvHessian*sens;
1574 variance=sig[0][0];
1575 median=GetOutputNeuron()->GetValue();
1576
1577 if (variance<0) {
1578 Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1579 variance=0;
1580 }
1581 variance=sqrt(variance);
1582
1583 //upper
1584 MvaUpper=fOutput->Eval(median+variance);
1585 if(errUpper)
1586 *errUpper=MvaUpper-MvaValue;
1587
1588 //lower
1589 MvaLower=fOutput->Eval(median-variance);
1590 if(errLower)
1591 *errLower=MvaValue-MvaLower;
1592
1593 return MvaValue;
1594}
1595
1596
1597#ifdef MethodMLP_UseMinuit__
1598
1599////////////////////////////////////////////////////////////////////////////////
1600/// minimize using Minuit
1601
1602void TMVA::MethodMLP::MinuitMinimize()
1603{
1604 fNumberOfWeights = fSynapses->GetEntriesFast();
1605
1606 TFitter* tfitter = new TFitter( fNumberOfWeights );
1607
1608 // minuit-specific settings
1609 Double_t args[10];
1610
1611 // output level
1612 args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1613 tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1614 tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1615
1616 double w[54];
1617
1618 // init parameters
1619 for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1620 TString parName = TString::Format("w%i", ipar);
1621 tfitter->SetParameter( ipar,
1622 parName, w[ipar], 0.1, 0, 0 );
1623 }
1624
1625 // define the CFN function
1626 tfitter->SetFCN( &IFCN );
1627
1628 // define fit strategy
1629 args[0] = 2;
1630 tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1631
1632 // now do the fit !
1633 args[0] = 1e-04;
1634 tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1635
1636 Bool_t doBetter = kFALSE;
1637 Bool_t doEvenBetter = kFALSE;
1638 if (doBetter) {
1639 args[0] = 1e-04;
1640 tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1641
1642 if (doEvenBetter) {
1643 args[0] = 500;
1644 tfitter->ExecuteCommand( "MINOS", args, 1 );
1645 }
1646 }
1647}
1648
1649////////////////////////////////////////////////////////////////////////////////
1650/// Evaluate the minimisation function
1651///
1652/// Input parameters:
1653/// - npars: number of currently variable parameters
1654/// CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1655/// - fitPars: array of (constant and variable) parameters
1656/// - iflag: indicates what is to be calculated (see example below)
1657/// - grad: array of gradients
1658///
1659/// Output parameters:
1660/// - f: the calculated function value.
1661/// - grad: the (optional) vector of first derivatives).
1662
1663void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1664{
1665 ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1666}
1667
1668TTHREAD_TLS(Int_t) nc = 0;
1669TTHREAD_TLS(double) minf = 1000000;
1670
1671void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1672{
1673 // first update the weights
1674 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1675 TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1676 synapse->SetWeight(fitPars[ipar]);
1677 }
1678
1679 // now compute the estimator
1680 f = CalculateEstimator();
1681
1682 nc++;
1683 if (f < minf) minf = f;
1684 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1685 Log() << kDEBUG << Endl;
1686 Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1687}
1688
1689////////////////////////////////////////////////////////////////////////////////
1690/// global "this" pointer to be used in minuit
1691
1692TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1693{
1694 return fgThis;
1695}
1696
1697#endif
1698
1699
1700////////////////////////////////////////////////////////////////////////////////
1701/// write specific classifier response
1702
1703void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1704{
1705 MethodANNBase::MakeClassSpecific(fout, className);
1706}
1707
1708////////////////////////////////////////////////////////////////////////////////
1709/// get help message text
1710///
1711/// typical length of text line:
1712/// "|--------------------------------------------------------------|"
1713
1715{
1716 TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1717 TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1718
1719 Log() << Endl;
1720 Log() << col << "--- Short description:" << colres << Endl;
1721 Log() << Endl;
1722 Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1723 Log() << "forward multilayer perceptron implementation. The MLP has a user-" << Endl;
1724 Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1725 Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1726 Log() << "signal and one background). " << Endl;
1727 Log() << Endl;
1728 Log() << col << "--- Performance optimisation:" << colres << Endl;
1729 Log() << Endl;
1730 Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1731 Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1732 Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1733 Log() << "number of input variables that have only little discrimination power. " << Endl;
1734 Log() << "" << Endl;
1735 Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1736 Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1737 Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1738 Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1739 Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1740 Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1741 Log() << "competitive results)." << Endl;
1742 Log() << Endl;
1743 Log() << col << "Overtraining: " << colres
1744 << "only the TMlpANN performs an explicit separation of the" << Endl;
1745 Log() << "full training sample into independent training and validation samples." << Endl;
1746 Log() << "We have found that in most high-energy physics applications the " << Endl;
1747 Log() << "available degrees of freedom (training events) are sufficient to " << Endl;
1748 Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1749 Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1750 Log() << "the use of validation samples would only reduce the available training" << Endl;
1751 Log() << "information. However, if the performance on the training sample is " << Endl;
1752 Log() << "found to be significantly better than the one found with the inde-" << Endl;
1753 Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1754 Log() << "are printed to standard output at the end of each training job." << Endl;
1755 Log() << Endl;
1756 Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1757 Log() << Endl;
1758 Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1759 Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1760 Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1761 Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1762 Log() << "in favour of more neurons in the first hidden layer." << Endl;
1763 Log() << "" << Endl;
1764 Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1765 Log() << "adjustable weights is small compared to the training sample size," << Endl;
1766 Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1767}
1768
#define REGISTER_METHOD(CLASS)
for example
#define d(i)
Definition RSha256.hxx:102
#define f(i)
Definition RSha256.hxx:104
#define a(i)
Definition RSha256.hxx:99
#define e(i)
Definition RSha256.hxx:103
int Int_t
Signed integer 4 bytes (int).
Definition RtypesCore.h:59
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
Definition RtypesCore.h:60
bool Bool_t
Boolean (0=false, 1=true) (bool).
Definition RtypesCore.h:77
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
float Float_t
Float 4 bytes (float).
Definition RtypesCore.h:71
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
char name[80]
Definition TGX11.cxx:148
TMatrixT< Double_t > TMatrixD
Definition TMatrixDfwd.h:23
<div class="legacybox"><h2>Legacy Code</h2> TFitter is a legacy interface: there will be no bug fixes...
Definition TFitter.h:19
void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t)) override
Specify the address of the fitting algorithm.
Definition TFitter.cxx:560
Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs) override
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition TFitter.cxx:88
Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh) override
set initial values for a parameter
Definition TFitter.cxx:587
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:878
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition TH1.cxx:3393
Bool_t WriteOptionsReference() const
Definition Config.h:65
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
const TString & GetOptions() const
MsgLogger & Log() const
Bool_t HasConverged(Bool_t withinConvergenceBand=kFALSE)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
void SetCurrentValue(Float_t value)
Float_t Progress()
returns a float from 0 (just started) to 1 (finished)
Float_t fImprovement
! minimum improvement which counts as improvement
Int_t fSteps
! number of steps without improvement required for convergence
Class that contains all the data information.
Definition DataSetInfo.h:62
std::vector< Float_t > & GetTargets()
Definition Event.h:103
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition Event.cxx:389
UInt_t GetClass() const
Definition Event.h:86
Float_t GetTarget(UInt_t itgt) const
Definition Event.h:102
Base class for TMVA fitters.
Definition FitterBase.h:51
Double_t Run()
estimator function interface for fitting
Fitter using a Genetic Algorithm.
The TMVA::Interval Class.
Definition Interval.h:61
std::vector< TH1 * > fEpochMonHistW
std::vector< TH1 * > fEpochMonHistB
void ProcessOptions() override
do nothing specific at this moment
TNeuron * GetInputNeuron(Int_t index)
void WaitForKeyboard()
wait for keyboard input, for debugging
MethodANNBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor Note: Right now it is an option to choose the neuron input function,...
TNeuron * GetOutputNeuron(Int_t index=0)
TActivation * fOutput
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
void PrintMessage(TString message, Bool_t force=kFALSE) const
print messages, turn off printing by setting verbose and debug flag appropriately
std::vector< Int_t > fRegulatorIdx
std::vector< Double_t > fRegulators
std::vector< TH1 * > fEpochMonHistS
Int_t fRandomSeed
random seed for initial synapse weights
void ForceNetworkCalculations()
calculate input values to each neuron
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
get the mva value generated by the NN
void ForceNetworkInputs(const Event *ev, Int_t ignoreIndex=-1)
force the input values of the input neurons force the value for each input neuron
TMatrixD fInvHessian
zjh
virtual void PrintNetwork() const
print network representation, for debugging
void CreateWeightMonitoringHists(const TString &bulkname, std::vector< TH1 * > *hv=nullptr) const
const char * GetName() const override
Definition MethodBase.h:337
Double_t GetXmin(Int_t ivar) const
Definition MethodBase.h:359
Bool_t DoMulticlass() const
Definition MethodBase.h:442
Types::EAnalysisType GetAnalysisType() const
Definition MethodBase.h:440
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition MethodBase.h:689
UInt_t GetNTargets() const
Definition MethodBase.h:349
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition MethodBase.h:439
void ExitFromTraining()
Definition MethodBase.h:467
UInt_t GetNEvents() const
Definition MethodBase.h:419
Bool_t DoRegression() const
Definition MethodBase.h:441
const Event * GetEvent() const
Definition MethodBase.h:754
DataSetInfo & DataInfo() const
Definition MethodBase.h:413
UInt_t GetNvar() const
Definition MethodBase.h:347
Double_t GetXmax(Int_t ivar) const
Definition MethodBase.h:360
UInt_t fIPyCurrentIter
Definition MethodBase.h:453
Bool_t IsSilentFile() const
Definition MethodBase.h:382
void SetSignalReferenceCut(Double_t cut)
Definition MethodBase.h:367
DataSet * Data() const
Definition MethodBase.h:412
Bool_t IsNormalised() const
Definition MethodBase.h:499
IPythonInteractive * fInteractive
Definition MethodBase.h:451
Multilayer Perceptron class built off of MethodANNBase.
Definition MethodMLP.h:69
Int_t fResetStep
reset time (how often we clear hessian matrix)
Definition MethodMLP.h:185
bool fCalculateErrors
compute inverse hessian matrix at the end of the training
Definition MethodMLP.h:165
std::vector< std::pair< Float_t, Float_t > > * fDeviationsFromTargets
deviation from the targets, event weight
Definition MethodMLP.h:204
Double_t fTau
line search variable
Definition MethodMLP.h:184
Int_t fGA_SC_rate
GA settings: SC_rate.
Definition MethodMLP.h:200
void GetHelpMessage() const override
get help message text
Float_t fWeightRange
suppress outliers for the estimator calculation
Definition MethodMLP.h:206
Float_t fSamplingWeight
changing factor for event weights when sampling is turned on
Definition MethodMLP.h:178
Int_t fBatchSize
batch size, only matters if in batch learning mode
Definition MethodMLP.h:192
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
zjh
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
bool fUseRegulator
zjh
Definition MethodMLP.h:164
void DeclareOptions() override
define the options (their key words) that can be set in the option string
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
std::vector< Double_t > fPriorDev
zjh
Definition MethodMLP.h:167
TString fBpModeS
backprop learning mode option string (sequential or batch)
Definition MethodMLP.h:191
void SteepestDir(TMatrixD &Dir)
void TrainOneEpoch()
train network over a single epoch/cycle of events
TString fTrainMethodS
training method option param
Definition MethodMLP.h:174
Int_t fGA_nsteps
GA settings: number of steps.
Definition MethodMLP.h:197
Double_t fPrior
zjh
Definition MethodMLP.h:166
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Int_t fUpdateLimit
zjh
Definition MethodMLP.h:171
static const Bool_t fgPRINT_BATCH
debug flags
Definition MethodMLP.h:217
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
Int_t fGA_preCalc
GA settings: number of pre-calc steps.
Definition MethodMLP.h:198
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Int_t fTestRate
test for overtraining performed at each th epochs
Definition MethodMLP.h:193
Double_t fDecayRate
decay rate for above learning rate
Definition MethodMLP.h:189
ETrainingMethod fTrainingMethod
method of training, BP or GA
Definition MethodMLP.h:173
Double_t EstimatorFunction(std::vector< Double_t > &parameters) override
interface to the estimate
EBPTrainingMode fBPMode
backprop learning mode (sequential or batch)
Definition MethodMLP.h:190
Double_t DerivDir(TMatrixD &Dir)
Double_t fGA_SC_factor
GA settings: SC_factor.
Definition MethodMLP.h:201
Double_t GetCEErr(const Event *ev, UInt_t index=0)
zjh
virtual ~MethodMLP()
destructor nothing to be done
Int_t fGA_SC_steps
GA settings: SC_steps.
Definition MethodMLP.h:199
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
void Shuffle(Int_t *index, Int_t n)
Input:
Bool_t fSamplingTraining
The training sample is sampled.
Definition MethodMLP.h:179
void SimulateEvent(const Event *ev)
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Double_t fLearnRate
learning rate for synapse weight adjustments
Definition MethodMLP.h:188
void UpdatePriors()
zjh
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
rank-1 approximation, neglect 2nd derivatives. //zjh
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Float_t fSamplingFraction
fraction of events which is sampled for training
Definition MethodMLP.h:176
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Bool_t fSamplingTesting
The testing sample is sampled.
Definition MethodMLP.h:180
Double_t GetError()
Double_t fLastAlpha
line search variable
Definition MethodMLP.h:183
void Init() override
default initializations
Float_t fSamplingEpoch
fraction of epochs where sampling is used
Definition MethodMLP.h:177
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
MLP can handle classification with 2 classes and regression with one regression-target.
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Bool_t fEpochMon
create and fill epoch-wise monitoring histograms (makes outputfile big!)
Definition MethodMLP.h:194
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition MethodMLP.cxx:89
void UpdateRegulators()
zjh
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=nullptr)
zjh
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
get the mva value generated by the NN
void Train() override
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
void ProcessOptions() override
process user options
static const Bool_t fgPRINT_SEQ
debug flags
Definition MethodMLP.h:216
Neuron class used by TMVA artificial neural network methods.
Definition TNeuron.h:49
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
Definition TNeuron.cxx:262
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition TNeuron.cxx:83
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition TNeuron.cxx:241
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition TNeuron.cxx:223
void CalculateDelta()
calculate error field
Definition TNeuron.cxx:114
Synapse class used by TMVA artificial neural network methods.
Definition TSynapse.h:42
void SetWeight(Double_t weight)
set synapse weight
Definition TSynapse.cxx:67
Double_t GetWeight()
Definition TSynapse.h:53
void SetDEDw(Double_t DEDw)
Definition TSynapse.h:85
void SetLearningRate(Double_t rate)
Definition TSynapse.h:56
Double_t GetDEDw()
Definition TSynapse.h:86
void InitDelta()
Definition TSynapse.h:83
Double_t GetDelta()
Definition TSynapse.h:87
void DecayLearningRate(Double_t rate)
Definition TSynapse.h:62
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition TSynapse.cxx:107
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition Timer.cxx:201
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition Tools.cxx:111
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:803
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kMulticlass
Definition Types.h:129
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
@ kDEBUG
Definition Types.h:56
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1) override
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
TMatrixT< Element > & Invert(Double_t *det=nullptr)
Invert the matrix and calculate its determinant.
An array of TObjects.
Definition TObjArray.h:31
Int_t GetEntriesFast() const
Definition TObjArray.h:58
TObject * At(Int_t idx) const override
Definition TObjArray.h:170
Basic string class.
Definition TString.h:138
const char * Data() const
Definition TString.h:384
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2385
void SetWeight(Double_t w)
Sets the weight of the synapse.
Definition TSynapse.cxx:101
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
Config & gConfig()
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Definition TMath.h:720
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition TMath.h:767
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:673