Logo ROOT  
Reference Guide
MethodMLP.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodMLP *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * ANN Multilayer Perceptron class for the discrimination of signal *
12 * from background. BFGS implementation based on TMultiLayerPerceptron *
13 * class from ROOT (http://root.cern.ch). *
14 * *
15 * Authors (alphabetical): *
16 * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18 * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19 * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20 * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21 * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22 * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25 * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26 * *
27 * Copyright (c) 2005-2011: *
28 * CERN, Switzerland *
29 * U. of Victoria, Canada *
30 * MPI-K Heidelberg, Germany *
31 * U. of Bonn, Germany *
32 * *
33 * Redistribution and use in source and binary forms, with or without *
34 * modification, are permitted according to the terms listed in LICENSE *
35 * (http://tmva.sourceforge.net/LICENSE) *
36 **********************************************************************************/
37
38/*! \class TMVA::MethodMLP
39\ingroup TMVA
40
41Multilayer Perceptron class built off of MethodANNBase
42
43*/
44
45#include "TMVA/MethodMLP.h"
46
47#include "TMVA/Config.h"
48#include "TMVA/Configurable.h"
51#include "TMVA/DataSet.h"
52#include "TMVA/DataSetInfo.h"
53#include "TMVA/FitterBase.h"
54#include "TMVA/GeneticFitter.h"
55#include "TMVA/IFitterTarget.h"
56#include "TMVA/IMethod.h"
57#include "TMVA/Interval.h"
58#include "TMVA/MethodANNBase.h"
59#include "TMVA/MsgLogger.h"
60#include "TMVA/TNeuron.h"
61#include "TMVA/TSynapse.h"
62#include "TMVA/Timer.h"
63#include "TMVA/Tools.h"
64#include "TMVA/Types.h"
65
66#include "TH1.h"
67#include "TString.h"
68#include "TFitter.h"
69#include "TMatrixD.h"
70#include "TMath.h"
71
72#include <iostream>
73#include <cmath>
74#include <vector>
75
76#ifdef MethodMLP_UseMinuit__
77TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
78Bool_t MethodMLP_UseMinuit = kTRUE;
79#endif
80
82
84
85 using std::vector;
86
87////////////////////////////////////////////////////////////////////////////////
88/// standard constructor
89
91 const TString& methodTitle,
92 DataSetInfo& theData,
93 const TString& theOption)
94 : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
95 fUseRegulator(false), fCalculateErrors(false),
96 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
97 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
98 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
99 fSamplingTraining(false), fSamplingTesting(false),
100 fLastAlpha(0.0), fTau(0.),
101 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
102 fBPMode(kSequential), fBpModeS("None"),
103 fBatchSize(0), fTestRate(0), fEpochMon(false),
104 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
105 fGA_SC_rate(0), fGA_SC_factor(0.0),
106 fDeviationsFromTargets(0),
107 fWeightRange (1.0)
108{
109
110}
111
112////////////////////////////////////////////////////////////////////////////////
113/// constructor from a weight file
114
116 const TString& theWeightFile)
117 : MethodANNBase( Types::kMLP, theData, theWeightFile),
118 fUseRegulator(false), fCalculateErrors(false),
119 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
120 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
121 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
122 fSamplingTraining(false), fSamplingTesting(false),
123 fLastAlpha(0.0), fTau(0.),
124 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
125 fBPMode(kSequential), fBpModeS("None"),
126 fBatchSize(0), fTestRate(0), fEpochMon(false),
127 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
128 fGA_SC_rate(0), fGA_SC_factor(0.0),
129 fDeviationsFromTargets(0),
130 fWeightRange (1.0)
131{
132}
133
134////////////////////////////////////////////////////////////////////////////////
135/// destructor
136/// nothing to be done
137
139{
140}
141
143{
144 Train(NumCycles());
145}
146
147
148
149////////////////////////////////////////////////////////////////////////////////
150/// MLP can handle classification with 2 classes and regression with one regression-target
151
153{
154 if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
155 if (type == Types::kMulticlass ) return kTRUE;
156 if (type == Types::kRegression ) return kTRUE;
157
158 return kFALSE;
159}
160
161////////////////////////////////////////////////////////////////////////////////
162/// default initializations
163
165{
166 // the minimum requirement to declare an event signal-like
167 SetSignalReferenceCut( 0.5 );
168#ifdef MethodMLP_UseMinuit__
169 fgThis = this;
170#endif
171}
172
173////////////////////////////////////////////////////////////////////////////////
174/// define the options (their key words) that can be set in the option string
175///
176/// know options:
177///
178/// - TrainingMethod <string> Training method
179/// available values are:
180/// - BP Back-Propagation <default>
181/// - GA Genetic Algorithm (takes a LONG time)
182///
183/// - LearningRate <float> NN learning rate parameter
184/// - DecayRate <float> Decay rate for learning parameter
185/// - TestRate <int> Test for overtraining performed at each #th epochs
186///
187/// - BPMode <string> Back-propagation learning mode
188/// available values are:
189/// - sequential <default>
190/// - batch
191///
192/// - BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
193/// - -1 for BatchSize=number_of_events
194
196{
197 DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
198 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
199 AddPreDefVal(TString("BP"));
200 AddPreDefVal(TString("GA"));
201 AddPreDefVal(TString("BFGS"));
202
203 DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
204 DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
205 DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
206 DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
207
208 DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
209 DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
210 DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
211
212 DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
213 DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
214
215 DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
216 DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
217
218 DeclareOptionRef(fBpModeS="sequential", "BPMode",
219 "Back-propagation learning mode: sequential or batch");
220 AddPreDefVal(TString("sequential"));
221 AddPreDefVal(TString("batch"));
222
223 DeclareOptionRef(fBatchSize=-1, "BatchSize",
224 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
225
226 DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
227 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
228
229 DeclareOptionRef(fSteps=-1, "ConvergenceTests",
230 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
231
232 DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
233 "Use regulator to avoid over-training"); //zjh
234 DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
235 "Maximum times of regulator update"); //zjh
236 DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
237 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
238
239 DeclareOptionRef(fWeightRange=1.0, "WeightRange",
240 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
241
242}
243
244////////////////////////////////////////////////////////////////////////////////
245/// process user options
246
248{
250
251
252 if (IgnoreEventsWithNegWeightsInTraining()) {
253 Log() << kINFO
254 << "Will ignore negative events in training!"
255 << Endl;
256 }
257
258
259 if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
260 else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
261 else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
262
263 if (fBpModeS == "sequential") fBPMode = kSequential;
264 else if (fBpModeS == "batch") fBPMode = kBatch;
265
266 // InitializeLearningRates();
267
268 if (fBPMode == kBatch) {
269 Data()->SetCurrentType(Types::kTraining);
270 Int_t numEvents = Data()->GetNEvents();
271 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
272 }
273}
274
275////////////////////////////////////////////////////////////////////////////////
276/// initialize learning rates of synapses, used only by back propagation
277
279{
280 Log() << kDEBUG << "Initialize learning rates" << Endl;
281 TSynapse *synapse;
282 Int_t numSynapses = fSynapses->GetEntriesFast();
283 for (Int_t i = 0; i < numSynapses; i++) {
284 synapse = (TSynapse*)fSynapses->At(i);
285 synapse->SetLearningRate(fLearnRate);
286 }
287}
288
289////////////////////////////////////////////////////////////////////////////////
290/// calculate the estimator that training is attempting to minimize
291
293{
294 // sanity check
295 if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
296 Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
297 }
298
299 Types::ETreeType saveType = Data()->GetCurrentType();
300 Data()->SetCurrentType(treeType);
301
302 // if epochs are counted create monitoring histograms (only available for classification)
303 TString type = (treeType == Types::kTraining ? "train" : "test");
304 TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
305 TString nameB = name + "_B";
306 TString nameS = name + "_S";
307 Int_t nbin = 100;
308 Float_t limit = 2;
309 TH1* histS = 0;
310 TH1* histB = 0;
311 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
312 histS = new TH1F( nameS, nameS, nbin, -limit, limit );
313 histB = new TH1F( nameB, nameB, nbin, -limit, limit );
314 }
315
316 Double_t estimator = 0;
317
318 // loop over all training events
319 Int_t nEvents = GetNEvents();
320 UInt_t nClasses = DataInfo().GetNClasses();
321 UInt_t nTgts = DataInfo().GetNTargets();
322
323
324 Float_t sumOfWeights = 0.f;
325 if( fWeightRange < 1.f ){
326 fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
327 }
328
329 for (Int_t i = 0; i < nEvents; i++) {
330
331 const Event* ev = GetEvent(i);
332
333 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
334 && (saveType == Types::kTraining)){
335 continue;
336 }
337
338 Double_t w = ev->GetWeight();
339
340 ForceNetworkInputs( ev );
341 ForceNetworkCalculations();
342
343 Double_t d = 0, v = 0;
344 if (DoRegression()) {
345 for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
346 v = GetOutputNeuron( itgt )->GetActivationValue();
347 Double_t targetValue = ev->GetTarget( itgt );
348 Double_t dt = v - targetValue;
349 d += (dt*dt);
350 }
351 estimator += d*w;
352 } else if (DoMulticlass() ) {
353 UInt_t cls = ev->GetClass();
354 if (fEstimator==kCE){
355 Double_t norm(0);
356 for (UInt_t icls = 0; icls < nClasses; icls++) {
357 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
358 norm += exp( activationValue );
359 if(icls==cls)
360 d = exp( activationValue );
361 }
362 d = -TMath::Log(d/norm);
363 }
364 else{
365 for (UInt_t icls = 0; icls < nClasses; icls++) {
366 Double_t desired = (icls==cls) ? 1.0 : 0.0;
367 v = GetOutputNeuron( icls )->GetActivationValue();
368 d = (desired-v)*(desired-v);
369 }
370 }
371 estimator += d*w; //zjh
372 } else {
373 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
374 v = GetOutputNeuron()->GetActivationValue();
375 if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
376 else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
377 estimator += d*w; //zjh
378 }
379
380 if( fDeviationsFromTargets )
381 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
382
383 sumOfWeights += w;
384
385
386 // fill monitoring histograms
387 if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
388 else if (histB != 0) histB->Fill( float(v), float(w) );
389 }
390
391
392 if( fDeviationsFromTargets ) {
393 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
394
395 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
396 estimator = 0.f;
397
398 Float_t weightRangeCut = fWeightRange*sumOfWeights;
399 Float_t weightSum = 0.f;
400 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
401 float deviation = (*itDev).first;
402 float devWeight = (*itDev).second;
403 weightSum += devWeight; // add the weight of this event
404 if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
405 estimator += devWeight*deviation;
406 }
407 }
408
409 sumOfWeights = sumOfWeightsInRange;
410 delete fDeviationsFromTargets;
411 }
412
413 if (histS != 0) fEpochMonHistS.push_back( histS );
414 if (histB != 0) fEpochMonHistB.push_back( histB );
415
416 //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
417 //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
418 //else estimator = estimator*0.5/Float_t(nEvents);
419 estimator = estimator/Float_t(sumOfWeights);
420
421
422 //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
423
424 Data()->SetCurrentType( saveType );
425
426 // provide epoch-wise monitoring
427 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
428 CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
429 }
430
431 return estimator;
432}
433
434////////////////////////////////////////////////////////////////////////////////
435
437{
438 if (fNetwork == 0) {
439 //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
440 Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
441 SetAnalysisType(GetAnalysisType());
442 }
443 Log() << kDEBUG << "reinitialize learning rates" << Endl;
444 InitializeLearningRates();
445 Log() << kHEADER;
446 PrintMessage("Training Network");
447 Log() << Endl;
448 Int_t nEvents=GetNEvents();
449 Int_t nSynapses=fSynapses->GetEntriesFast();
450 if (nSynapses>nEvents)
451 Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
452
453 fIPyMaxIter = nEpochs;
454 if (fInteractive && fInteractive->NotInitialized()){
455 std::vector<TString> titles = {"Error on training set", "Error on test set"};
456 fInteractive->Init(titles);
457 }
458
459#ifdef MethodMLP_UseMinuit__
460 if (useMinuit) MinuitMinimize();
461#else
462 if (fTrainingMethod == kGA) GeneticMinimize();
463 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
464 else BackPropagationMinimize(nEpochs);
465#endif
466
467 float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
468 float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
469 if (fUseRegulator){
470 Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
471 UpdateRegulators();
472 Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
473 }
474
475 if( fCalculateErrors || fUseRegulator )
476 {
477 Int_t numSynapses=fSynapses->GetEntriesFast();
478 fInvHessian.ResizeTo(numSynapses,numSynapses);
479 GetApproxInvHessian( fInvHessian ,false);
480 }
481 ExitFromTraining();
482}
483
484////////////////////////////////////////////////////////////////////////////////
485/// train network with BFGS algorithm
486
488{
489 Timer timer( (fSteps>0?100:nEpochs), GetName() );
490
491 // create histograms for overtraining monitoring
492 Int_t nbinTest = Int_t(nEpochs/fTestRate);
493 if(!IsSilentFile())
494 {
495 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
496 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
497 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
498 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
499 }
500
501 Int_t nSynapses = fSynapses->GetEntriesFast();
502 Int_t nWeights = nSynapses;
503
504 for (Int_t i=0;i<nSynapses;i++) {
505 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
506 synapse->SetDEDw(0.0);
507 }
508
509 std::vector<Double_t> buffer( nWeights );
510 for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
511
512 TMatrixD Dir ( nWeights, 1 );
513 TMatrixD Hessian ( nWeights, nWeights );
514 TMatrixD Gamma ( nWeights, 1 );
515 TMatrixD Delta ( nWeights, 1 );
516 Int_t RegUpdateCD=0; //zjh
517 Int_t RegUpdateTimes=0; //zjh
518 Double_t AccuError=0;
519
520 Double_t trainE = -1;
521 Double_t testE = -1;
522
523 fLastAlpha = 0.;
524
525 if(fSamplingTraining || fSamplingTesting)
526 Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
527
528 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
529 timer.DrawProgressBar( 0 );
530
531 // start training cycles (epochs)
532 for (Int_t i = 0; i < nEpochs; i++) {
533
534 if (fExitFromTraining) break;
535 fIPyCurrentIter = i;
536 if (Float_t(i)/nEpochs < fSamplingEpoch) {
537 if ((i+1)%fTestRate == 0 || (i == 0)) {
538 if (fSamplingTraining) {
539 Data()->SetCurrentType( Types::kTraining );
540 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
541 Data()->CreateSampling();
542 }
543 if (fSamplingTesting) {
544 Data()->SetCurrentType( Types::kTesting );
545 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
546 Data()->CreateSampling();
547 }
548 }
549 }
550 else {
551 Data()->SetCurrentType( Types::kTraining );
552 Data()->InitSampling(1.0,1.0);
553 Data()->SetCurrentType( Types::kTesting );
554 Data()->InitSampling(1.0,1.0);
555 }
556 Data()->SetCurrentType( Types::kTraining );
557
558 //zjh
559 if (fUseRegulator) {
560 UpdatePriors();
561 RegUpdateCD++;
562 }
563 //zjh
564
565 SetGammaDelta( Gamma, Delta, buffer );
566
567 if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
568 SteepestDir( Dir );
569 Hessian.UnitMatrix();
570 RegUpdateCD=0; //zjh
571 }
572 else {
573 if (GetHessian( Hessian, Gamma, Delta )) {
574 SteepestDir( Dir );
575 Hessian.UnitMatrix();
576 RegUpdateCD=0; //zjh
577 }
578 else SetDir( Hessian, Dir );
579 }
580
581 Double_t dError=0; //zjh
582 if (DerivDir( Dir ) > 0) {
583 SteepestDir( Dir );
584 Hessian.UnitMatrix();
585 RegUpdateCD=0; //zjh
586 }
587 if (LineSearch( Dir, buffer, &dError )) { //zjh
588 Hessian.UnitMatrix();
589 SteepestDir( Dir );
590 RegUpdateCD=0; //zjh
591 if (LineSearch(Dir, buffer, &dError)) { //zjh
592 i = nEpochs;
593 Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
594 }
595 }
596
597 //zjh+
598 if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
599 AccuError+=dError;
600
601 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
602 Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
603 UpdateRegulators();
604 Hessian.UnitMatrix();
605 RegUpdateCD=0;
606 RegUpdateTimes++;
607 AccuError=0;
608 }
609 //zjh-
610
611 // monitor convergence of training and control sample
612 if ((i+1)%fTestRate == 0) {
613 //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
614 //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
615 trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
616 testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
617 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
618 if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
619 {
620 fEstimatorHistTrain->Fill( i+1, trainE );
621 fEstimatorHistTest ->Fill( i+1, testE );
622 }
623 Bool_t success = kFALSE;
624 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
625 success = kTRUE;
626 }
627 Data()->EventResult( success );
628
629 SetCurrentValue( testE );
630 if (HasConverged()) {
631 if (Float_t(i)/nEpochs < fSamplingEpoch) {
632 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
633 i = newEpoch;
634 ResetConvergenceCounter();
635 }
636 else break;
637 }
638 }
639
640 // draw progress
641 TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
642 if (fSteps > 0) {
643 Float_t progress = 0;
644 if (Float_t(i)/nEpochs < fSamplingEpoch)
645 // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
646 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
647 else
648 {
649 // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
650 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
651 }
652 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
653 if (progress2>progress) progress=progress2; //zjh
654 timer.DrawProgressBar( Int_t(progress), convText );
655 }
656 else {
657 Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
658 if (progress<i) progress=i; //zjh
659 timer.DrawProgressBar( progress, convText ); //zjh
660 }
661
662 // some verbose output
663 if (fgPRINT_SEQ) {
664 PrintNetwork();
665 WaitForKeyboard();
666 }
667 }
668}
669
670////////////////////////////////////////////////////////////////////////////////
671
672void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
673{
674 Int_t nWeights = fSynapses->GetEntriesFast();
675
676 Int_t IDX = 0;
677 Int_t nSynapses = fSynapses->GetEntriesFast();
678 for (Int_t i=0;i<nSynapses;i++) {
679 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
680 Gamma[IDX++][0] = -synapse->GetDEDw();
681 }
682
683 for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
684
685 ComputeDEDw();
686
687 IDX = 0;
688 for (Int_t i=0;i<nSynapses;i++)
689 {
690 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
691 Gamma[IDX++][0] += synapse->GetDEDw();
692 }
693}
694
695////////////////////////////////////////////////////////////////////////////////
696
698{
699 Int_t nSynapses = fSynapses->GetEntriesFast();
700 for (Int_t i=0;i<nSynapses;i++) {
701 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
702 synapse->SetDEDw( 0.0 );
703 }
704
705 Int_t nEvents = GetNEvents();
706 Int_t nPosEvents = nEvents;
707 for (Int_t i=0;i<nEvents;i++) {
708
709 const Event* ev = GetEvent(i);
710 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
711 && (Data()->GetCurrentType() == Types::kTraining)){
712 --nPosEvents;
713 continue;
714 }
715
716 SimulateEvent( ev );
717
718 for (Int_t j=0;j<nSynapses;j++) {
719 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
720 synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
721 }
722 }
723
724 for (Int_t i=0;i<nSynapses;i++) {
725 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
726 Double_t DEDw=synapse->GetDEDw(); //zjh
727 if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
728 synapse->SetDEDw( DEDw / nPosEvents ); //zjh
729 }
730}
731
732////////////////////////////////////////////////////////////////////////////////
733
735{
736 Double_t eventWeight = ev->GetWeight();
737
738 ForceNetworkInputs( ev );
739 ForceNetworkCalculations();
740
741 if (DoRegression()) {
742 UInt_t ntgt = DataInfo().GetNTargets();
743 for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
744 Double_t desired = ev->GetTarget(itgt);
745 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
746 GetOutputNeuron( itgt )->SetError(error);
747 }
748 } else if (DoMulticlass()) {
749 UInt_t nClasses = DataInfo().GetNClasses();
750 UInt_t cls = ev->GetClass();
751 for (UInt_t icls = 0; icls < nClasses; icls++) {
752 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
753 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
754 GetOutputNeuron( icls )->SetError(error);
755 }
756 } else {
757 Double_t desired = GetDesiredOutput( ev );
758 Double_t error=-1; //zjh
759 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
760 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
761 GetOutputNeuron()->SetError(error);
762 }
763
764 CalculateNeuronDeltas();
765 for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
766 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
767 synapse->InitDelta();
768 synapse->CalculateDelta();
769 }
770}
771
772////////////////////////////////////////////////////////////////////////////////
773
775{
776 Int_t IDX = 0;
777 Int_t nSynapses = fSynapses->GetEntriesFast();
778
779 for (Int_t i=0;i<nSynapses;i++) {
780 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
781 Dir[IDX++][0] = -synapse->GetDEDw();
782 }
783}
784
785////////////////////////////////////////////////////////////////////////////////
786
788{
790 if ((Double_t) gd[0][0] == 0.) return kTRUE;
791 TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
794 Double_t a = 1 / (Double_t) gd[0][0];
795 Double_t f = 1 + ((Double_t)gHg[0][0]*a);
797 res *= f;
798 res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
800 res *= a;
801 Hessian += res;
802
803 return kFALSE;
804}
805
806////////////////////////////////////////////////////////////////////////////////
807
809{
810 Int_t IDX = 0;
811 Int_t nSynapses = fSynapses->GetEntriesFast();
812 TMatrixD DEDw(nSynapses, 1);
813
814 for (Int_t i=0;i<nSynapses;i++) {
815 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
816 DEDw[IDX++][0] = synapse->GetDEDw();
817 }
818
819 dir = Hessian * DEDw;
820 for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
821}
822
823////////////////////////////////////////////////////////////////////////////////
824
826{
827 Int_t IDX = 0;
828 Int_t nSynapses = fSynapses->GetEntriesFast();
829 Double_t Result = 0.0;
830
831 for (Int_t i=0;i<nSynapses;i++) {
832 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
833 Result += Dir[IDX++][0] * synapse->GetDEDw();
834 }
835 return Result;
836}
837
838////////////////////////////////////////////////////////////////////////////////
839
840Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
841{
842 Int_t IDX = 0;
843 Int_t nSynapses = fSynapses->GetEntriesFast();
844 Int_t nWeights = nSynapses;
845
846 std::vector<Double_t> Origin(nWeights);
847 for (Int_t i=0;i<nSynapses;i++) {
848 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
849 Origin[i] = synapse->GetWeight();
850 }
851
852 Double_t err1 = GetError();
853 Double_t errOrigin=err1;//zjh
854 Double_t alpha1 = 0.;
855 Double_t alpha2 = fLastAlpha;
856
857
858 if (alpha2 < 0.01) alpha2 = 0.01;
859 else if (alpha2 > 2.0) alpha2 = 2.0;
860 Double_t alpha_original = alpha2;
861 Double_t alpha3 = alpha2;
862
863 SetDirWeights( Origin, Dir, alpha2 );
864 Double_t err2 = GetError();
865 //Double_t err2 = err1;
866 Double_t err3 = err2;
867 Bool_t bingo = kFALSE;
868
869
870 if (err1 > err2) {
871 for (Int_t i=0;i<100;i++) {
872 alpha3 *= fTau;
873 SetDirWeights(Origin, Dir, alpha3);
874 err3 = GetError();
875 if (err3 > err2) {
876 bingo = kTRUE;
877 break;
878 }
879 alpha1 = alpha2;
880 err1 = err2;
881 alpha2 = alpha3;
882 err2 = err3;
883 }
884 if (!bingo) {
885 SetDirWeights(Origin, Dir, 0.);
886 return kTRUE;
887 }
888 }
889 else {
890 for (Int_t i=0;i<100;i++) {
891 alpha2 /= fTau;
892 if (i==50) {
893 Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
894 alpha2 = -alpha_original;
895 }
896 SetDirWeights(Origin, Dir, alpha2);
897 err2 = GetError();
898 if (err1 > err2) {
899 bingo = kTRUE;
900 break;
901 }
902 alpha3 = alpha2;
903 err3 = err2;
904 }
905 if (!bingo) {
906 SetDirWeights(Origin, Dir, 0.);
907 Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
908 fLastAlpha = 0.05;
909 return kTRUE;
910 }
911 }
912
913 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
914 fLastAlpha = 0.5 * (alpha1 + alpha3 -
915 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
916 - ( err2 - err1 ) / (alpha2 - alpha1 )));
917 }
918 else {
919 fLastAlpha = alpha2;
920 }
921
922 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
923
924 SetDirWeights(Origin, Dir, fLastAlpha);
925
926 // leaving these lines uncommented is a heavy price to pay for only a warning message
927 // (which shouldn't appear anyway)
928 // --> about 15% of time is spent in the final GetError().
929 //
930 Double_t finalError = GetError();
931 if (finalError > err1) {
932 Log() << kWARNING << "Line search increased error! Something is wrong."
933 << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
934 << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
935 }
936
937 for (Int_t i=0;i<nSynapses;i++) {
938 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
939 buffer[IDX] = synapse->GetWeight() - Origin[IDX];
940 IDX++;
941 }
942
943 if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
944
945 return kFALSE;
946}
947
948////////////////////////////////////////////////////////////////////////////////
949
950void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
951{
952 Int_t IDX = 0;
953 Int_t nSynapses = fSynapses->GetEntriesFast();
954
955 for (Int_t i=0;i<nSynapses;i++) {
956 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
957 synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
958 IDX++;
959 }
960 if (fUseRegulator) UpdatePriors();//zjh
961}
962
963
964////////////////////////////////////////////////////////////////////////////////
965
967{
968 Int_t nEvents = GetNEvents();
969 UInt_t ntgts = GetNTargets();
970 Double_t Result = 0.;
971
972 for (Int_t i=0;i<nEvents;i++) {
973 const Event* ev = GetEvent(i);
974
975 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
976 && (Data()->GetCurrentType() == Types::kTraining)){
977 continue;
978 }
979 SimulateEvent( ev );
980
981 Double_t error = 0.;
982 if (DoRegression()) {
983 for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
984 error += GetMSEErr( ev, itgt );//zjh
985 }
986 } else if ( DoMulticlass() ){
987 for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
988 error += GetMSEErr( ev, icls );
989 }
990 } else {
991 if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
992 else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
993 }
994 Result += error * ev->GetWeight();
995 }
996 if (fUseRegulator) Result+=fPrior; //zjh
997 if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
998 return Result;
999}
1000
1001////////////////////////////////////////////////////////////////////////////////
1002
1004{
1005 Double_t error = 0;
1006 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1007 Double_t target = 0;
1008 if (DoRegression()) target = ev->GetTarget( index );
1009 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1010 else target = GetDesiredOutput( ev );
1011
1012 error = 0.5*(output-target)*(output-target); //zjh
1013
1014 return error;
1015
1016}
1017
1018////////////////////////////////////////////////////////////////////////////////
1019
1021{
1022 Double_t error = 0;
1023 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1024 Double_t target = 0;
1025 if (DoRegression()) target = ev->GetTarget( index );
1026 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1027 else target = GetDesiredOutput( ev );
1028
1029 error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1030
1031 return error;
1032}
1033
1034////////////////////////////////////////////////////////////////////////////////
1035/// minimize estimator / train network with back propagation algorithm
1036
1038{
1039 // Timer timer( nEpochs, GetName() );
1040 Timer timer( (fSteps>0?100:nEpochs), GetName() );
1041 Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1042
1043 // create histograms for overtraining monitoring
1044 Int_t nbinTest = Int_t(nEpochs/fTestRate);
1045 if(!IsSilentFile())
1046 {
1047 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1048 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1049 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1050 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1051 }
1052 if(fSamplingTraining || fSamplingTesting)
1053 Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1054
1055 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1056 timer.DrawProgressBar(0);
1057
1058 // estimators
1059 Double_t trainE = -1;
1060 Double_t testE = -1;
1061
1062 // start training cycles (epochs)
1063 for (Int_t i = 0; i < nEpochs; i++) {
1064
1065 if (fExitFromTraining) break;
1066 fIPyCurrentIter = i;
1067 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1068 if ((i+1)%fTestRate == 0 || (i == 0)) {
1069 if (fSamplingTraining) {
1070 Data()->SetCurrentType( Types::kTraining );
1071 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1072 Data()->CreateSampling();
1073 }
1074 if (fSamplingTesting) {
1075 Data()->SetCurrentType( Types::kTesting );
1076 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1077 Data()->CreateSampling();
1078 }
1079 }
1080 }
1081 else {
1082 Data()->SetCurrentType( Types::kTraining );
1083 Data()->InitSampling(1.0,1.0);
1084 Data()->SetCurrentType( Types::kTesting );
1085 Data()->InitSampling(1.0,1.0);
1086 }
1087 Data()->SetCurrentType( Types::kTraining );
1088
1089 TrainOneEpoch();
1090 DecaySynapseWeights(i >= lateEpoch);
1091
1092 // monitor convergence of training and control sample
1093 if ((i+1)%fTestRate == 0) {
1094 trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1095 testE = CalculateEstimator( Types::kTesting, i ); // estimator for test sample
1096 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1097 if(!IsSilentFile())
1098 {
1099 fEstimatorHistTrain->Fill( i+1, trainE );
1100 fEstimatorHistTest ->Fill( i+1, testE );
1101 }
1102 Bool_t success = kFALSE;
1103 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1104 success = kTRUE;
1105 }
1106 Data()->EventResult( success );
1107
1108 SetCurrentValue( testE );
1109 if (HasConverged()) {
1110 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1111 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1112 i = newEpoch;
1113 ResetConvergenceCounter();
1114 }
1115 else {
1116 if (lateEpoch > i) lateEpoch = i;
1117 else break;
1118 }
1119 }
1120 }
1121
1122 // draw progress bar (add convergence value)
1123 TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1124 if (fSteps > 0) {
1125 Float_t progress = 0;
1126 if (Float_t(i)/nEpochs < fSamplingEpoch)
1127 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1128 else
1129 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1130
1131 timer.DrawProgressBar( Int_t(progress), convText );
1132 }
1133 else {
1134 timer.DrawProgressBar( i, convText );
1135 }
1136 }
1137}
1138
1139////////////////////////////////////////////////////////////////////////////////
1140/// train network over a single epoch/cycle of events
1141
1143{
1144 Int_t nEvents = Data()->GetNEvents();
1145
1146 // randomize the order events will be presented, important for sequential mode
1147 Int_t* index = new Int_t[nEvents];
1148 for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1149 Shuffle(index, nEvents);
1150
1151 // loop over all training events
1152 for (Int_t i = 0; i < nEvents; i++) {
1153
1154 const Event * ev = GetEvent(index[i]);
1155 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1156 && (Data()->GetCurrentType() == Types::kTraining)){
1157 continue;
1158 }
1159
1160 TrainOneEvent(index[i]);
1161
1162 // do adjustments if in batch mode
1163 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1164 AdjustSynapseWeights();
1165 if (fgPRINT_BATCH) {
1166 PrintNetwork();
1167 WaitForKeyboard();
1168 }
1169 }
1170
1171 // debug in sequential mode
1172 if (fgPRINT_SEQ) {
1173 PrintNetwork();
1174 WaitForKeyboard();
1175 }
1176 }
1177
1178 delete[] index;
1179}
1180
1181////////////////////////////////////////////////////////////////////////////////
1182/// Input:
1183/// - index: the array to shuffle
1184/// - n: the size of the array
1185/// Output:
1186/// - index: the shuffled indexes
1187///
1188/// This method is used for sequential training
1189
1191{
1192 Int_t j, k;
1193 Int_t a = n - 1;
1194 for (Int_t i = 0; i < n; i++) {
1195 j = (Int_t) (frgen->Rndm() * a);
1196 if (j<n){ // address the 'worries' of coverity
1197 k = index[j];
1198 index[j] = index[i];
1199 index[i] = k;
1200 }
1201 }
1202}
1203
1204////////////////////////////////////////////////////////////////////////////////
1205/// decay synapse weights
1206/// in last 10 epochs, lower learning rate even more to find a good minimum
1207
1209{
1210 TSynapse* synapse;
1211 Int_t numSynapses = fSynapses->GetEntriesFast();
1212 for (Int_t i = 0; i < numSynapses; i++) {
1213 synapse = (TSynapse*)fSynapses->At(i);
1214 if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1215 else synapse->DecayLearningRate(fDecayRate);
1216 }
1217}
1218
1219////////////////////////////////////////////////////////////////////////////////
1220/// fast per-event training
1221
1223{
1224 GetEvent(ievt);
1225
1226 // as soon as we know how to get event weights, get that here
1227
1228 // note: the normalization of event weights will affect the choice
1229 // of learning rate, one will have to experiment to get the right value.
1230 // in general, if the "average" event weight is 1, the learning rate
1231 // should be good if set around 0.02 (a good value if all event weights are 1)
1232 Double_t eventWeight = 1.0;
1233
1234 // get the desired output of this event
1235 Double_t desired;
1236 if (type == 0) desired = fOutput->GetMin(); // background //zjh
1237 else desired = fOutput->GetMax(); // signal //zjh
1238
1239 // force the value for each input neuron
1240 Double_t x;
1241 TNeuron* neuron;
1242
1243 for (UInt_t j = 0; j < GetNvar(); j++) {
1244 x = branchVar[j];
1245 if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1246 neuron = GetInputNeuron(j);
1247 neuron->ForceValue(x);
1248 }
1249
1250 ForceNetworkCalculations();
1251 UpdateNetwork(desired, eventWeight);
1252}
1253
1254////////////////////////////////////////////////////////////////////////////////
1255/// train network over a single event
1256/// this uses the new event model
1257
1259{
1260 // note: the normalization of event weights will affect the choice
1261 // of learning rate, one will have to experiment to get the right value.
1262 // in general, if the "average" event weight is 1, the learning rate
1263 // should be good if set around 0.02 (a good value if all event weights are 1)
1264
1265 const Event * ev = GetEvent(ievt);
1266 Double_t eventWeight = ev->GetWeight();
1267 ForceNetworkInputs( ev );
1268 ForceNetworkCalculations();
1269 if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1270 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1271 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1272}
1273
1274////////////////////////////////////////////////////////////////////////////////
1275/// get the desired output of this event
1276
1278{
1279 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1280}
1281
1282////////////////////////////////////////////////////////////////////////////////
1283/// update the network based on how closely
1284/// the output matched the desired output
1285
1287{
1288 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1289 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1290 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1291 else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1292 error *= eventWeight;
1293 GetOutputNeuron()->SetError(error);
1294 CalculateNeuronDeltas();
1295 UpdateSynapses();
1296}
1297
1298////////////////////////////////////////////////////////////////////////////////
1299/// update the network based on how closely
1300/// the output matched the desired output
1301
1302void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1303{
1304 // Norm for softmax
1305 Double_t norm = 0.;
1306 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1307 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1308 norm += TMath::Exp(act);
1309 }
1310
1311 // Get output of network, and apply softmax
1312 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1313 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1314 Double_t output = TMath::Exp(act) / norm;
1315 Double_t error = output - desired.at(i);
1316 error *= eventWeight;
1317 GetOutputNeuron(i)->SetError(error);
1318 }
1319
1320 // Do backpropagation
1321 CalculateNeuronDeltas();
1322 UpdateSynapses();
1323}
1324
1325////////////////////////////////////////////////////////////////////////////////
1326/// have each neuron calculate its delta by back propagation
1327
1329{
1330 TNeuron* neuron;
1331 Int_t numNeurons;
1332 Int_t numLayers = fNetwork->GetEntriesFast();
1333 TObjArray* curLayer;
1334
1335 // step backwards through the network (back propagation)
1336 // deltas calculated starting at output layer
1337 for (Int_t i = numLayers-1; i >= 0; i--) {
1338 curLayer = (TObjArray*)fNetwork->At(i);
1339 numNeurons = curLayer->GetEntriesFast();
1340
1341 for (Int_t j = 0; j < numNeurons; j++) {
1342 neuron = (TNeuron*) curLayer->At(j);
1343 neuron->CalculateDelta();
1344 }
1345 }
1346}
1347
1348////////////////////////////////////////////////////////////////////////////////
1349/// create genetics class similar to GeneticCut
1350/// give it vector of parameter ranges (parameters = weights)
1351/// link fitness function of this class to ComputeEstimator
1352/// instantiate GA (see MethodCuts)
1353/// run it
1354/// then this should exist for GA, Minuit and random sampling
1355
1357{
1358 PrintMessage("Minimizing Estimator with GA");
1359
1360 // define GA parameters
1361 fGA_preCalc = 1;
1362 fGA_SC_steps = 10;
1363 fGA_SC_rate = 5;
1364 fGA_SC_factor = 0.95;
1365 fGA_nsteps = 30;
1366
1367 // ranges
1368 std::vector<Interval*> ranges;
1369
1370 Int_t numWeights = fSynapses->GetEntriesFast();
1371 for (Int_t ivar=0; ivar< numWeights; ivar++) {
1372 ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1373 }
1374
1375 FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1376 gf->Run();
1377
1378 Double_t estimator = CalculateEstimator();
1379 Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1380}
1381
1382////////////////////////////////////////////////////////////////////////////////
1383/// interface to the estimate
1384
1385Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1386{
1387 return ComputeEstimator( parameters );
1388}
1389
1390////////////////////////////////////////////////////////////////////////////////
1391/// this function is called by GeneticANN for GA optimization
1392
1393Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1394{
1395 TSynapse* synapse;
1396 Int_t numSynapses = fSynapses->GetEntriesFast();
1397
1398 for (Int_t i = 0; i < numSynapses; i++) {
1399 synapse = (TSynapse*)fSynapses->At(i);
1400 synapse->SetWeight(parameters.at(i));
1401 }
1402 if (fUseRegulator) UpdatePriors(); //zjh
1403
1404 Double_t estimator = CalculateEstimator();
1405
1406 return estimator;
1407}
1408
1409////////////////////////////////////////////////////////////////////////////////
1410/// update synapse error fields and adjust the weights (if in sequential mode)
1411
1413{
1414 TNeuron* neuron;
1415 Int_t numNeurons;
1416 TObjArray* curLayer;
1417 Int_t numLayers = fNetwork->GetEntriesFast();
1418
1419 for (Int_t i = 0; i < numLayers; i++) {
1420 curLayer = (TObjArray*)fNetwork->At(i);
1421 numNeurons = curLayer->GetEntriesFast();
1422
1423 for (Int_t j = 0; j < numNeurons; j++) {
1424 neuron = (TNeuron*) curLayer->At(j);
1425 if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1426 else neuron->UpdateSynapsesSequential();
1427 }
1428 }
1429}
1430
1431////////////////////////////////////////////////////////////////////////////////
1432/// just adjust the synapse weights (should be called in batch mode)
1433
1435{
1436 TNeuron* neuron;
1437 Int_t numNeurons;
1438 TObjArray* curLayer;
1439 Int_t numLayers = fNetwork->GetEntriesFast();
1440
1441 for (Int_t i = numLayers-1; i >= 0; i--) {
1442 curLayer = (TObjArray*)fNetwork->At(i);
1443 numNeurons = curLayer->GetEntriesFast();
1444
1445 for (Int_t j = 0; j < numNeurons; j++) {
1446 neuron = (TNeuron*) curLayer->At(j);
1447 neuron->AdjustSynapseWeights();
1448 }
1449 }
1450}
1451
1452////////////////////////////////////////////////////////////////////////////////
1453
1455{
1456 fPrior=0;
1457 fPriorDev.clear();
1458 Int_t nSynapses = fSynapses->GetEntriesFast();
1459 for (Int_t i=0;i<nSynapses;i++) {
1460 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1461 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1462 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1463 }
1464}
1465
1466////////////////////////////////////////////////////////////////////////////////
1467
1469{
1470 TMatrixD InvH(0,0);
1471 GetApproxInvHessian(InvH);
1472 Int_t numSynapses=fSynapses->GetEntriesFast();
1473 Int_t numRegulators=fRegulators.size();
1474 Float_t gamma=0,
1475 variance=1.; // Gaussian noise
1476 std::vector<Int_t> nWDP(numRegulators);
1477 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1478 for (int i=0;i<numSynapses;i++) {
1479 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1480 Int_t idx=fRegulatorIdx[i];
1481 nWDP[idx]++;
1482 trace[idx]+=InvH[i][i];
1483 gamma+=1-fRegulators[idx]*InvH[i][i];
1484 weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1485 }
1486 if (fEstimator==kMSE) {
1487 if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1488 else variance=CalculateEstimator( Types::kTraining, 0 );
1489 }
1490
1491 //Log() << kDEBUG << Endl;
1492 for (int i=0;i<numRegulators;i++)
1493 {
1494 //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1495 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1496 if (fRegulators[i]<0) fRegulators[i]=0;
1497 Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1498 }
1499 float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1500 float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1501
1502 Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1503
1504}
1505
1506////////////////////////////////////////////////////////////////////////////////
1507
1508void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1509{
1510 Int_t numSynapses=fSynapses->GetEntriesFast();
1511 InvHessian.ResizeTo( numSynapses, numSynapses );
1512 InvHessian=0;
1513 TMatrixD sens(numSynapses,1);
1514 TMatrixD sensT(1,numSynapses);
1515 Int_t nEvents = GetNEvents();
1516 for (Int_t i=0;i<nEvents;i++) {
1517 GetEvent(i);
1518 double outputValue=GetMvaValue(); // force calculation
1519 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1520 CalculateNeuronDeltas();
1521 for (Int_t j = 0; j < numSynapses; j++){
1522 TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1523 synapses->InitDelta();
1524 synapses->CalculateDelta();
1525 sens[j][0]=sensT[0][j]=synapses->GetDelta();
1526 }
1527 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1528 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1529 }
1530
1531 // TVectorD eValue(numSynapses);
1532 if (regulate) {
1533 for (Int_t i = 0; i < numSynapses; i++){
1534 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1535 }
1536 }
1537 else {
1538 for (Int_t i = 0; i < numSynapses; i++){
1539 InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1540 }
1541 }
1542
1543 InvHessian.Invert();
1544
1545}
1546
1547////////////////////////////////////////////////////////////////////////////////
1548
1550{
1551 Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1552
1553 // no hessian (old training file) or no error requested
1554 if (!fCalculateErrors || errLower==0 || errUpper==0)
1555 return MvaValue;
1556
1557 Double_t MvaUpper,MvaLower,median,variance;
1558 Int_t numSynapses=fSynapses->GetEntriesFast();
1559 if (fInvHessian.GetNcols()!=numSynapses) {
1560 Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1561 }
1562 TMatrixD sens(numSynapses,1);
1563 TMatrixD sensT(1,numSynapses);
1564 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1565 //GetOutputNeuron()->SetError(1.);
1566 CalculateNeuronDeltas();
1567 for (Int_t i = 0; i < numSynapses; i++){
1568 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1569 synapses->InitDelta();
1570 synapses->CalculateDelta();
1571 sensT[0][i]=synapses->GetDelta();
1572 }
1573 sens.Transpose(sensT);
1574 TMatrixD sig=sensT*fInvHessian*sens;
1575 variance=sig[0][0];
1576 median=GetOutputNeuron()->GetValue();
1577
1578 if (variance<0) {
1579 Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1580 variance=0;
1581 }
1582 variance=sqrt(variance);
1583
1584 //upper
1585 MvaUpper=fOutput->Eval(median+variance);
1586 if(errUpper)
1587 *errUpper=MvaUpper-MvaValue;
1588
1589 //lower
1590 MvaLower=fOutput->Eval(median-variance);
1591 if(errLower)
1592 *errLower=MvaValue-MvaLower;
1593
1594 return MvaValue;
1595}
1596
1597
1598#ifdef MethodMLP_UseMinuit__
1599
1600////////////////////////////////////////////////////////////////////////////////
1601/// minimize using Minuit
1602
1603void TMVA::MethodMLP::MinuitMinimize()
1604{
1605 fNumberOfWeights = fSynapses->GetEntriesFast();
1606
1607 TFitter* tfitter = new TFitter( fNumberOfWeights );
1608
1609 // minuit-specific settings
1610 Double_t args[10];
1611
1612 // output level
1613 args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1614 tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1615 tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1616
1617 double w[54];
1618
1619 // init parameters
1620 for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1621 TString parName = Form("w%i", ipar);
1622 tfitter->SetParameter( ipar,
1623 parName, w[ipar], 0.1, 0, 0 );
1624 }
1625
1626 // define the CFN function
1627 tfitter->SetFCN( &IFCN );
1628
1629 // define fit strategy
1630 args[0] = 2;
1631 tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1632
1633 // now do the fit !
1634 args[0] = 1e-04;
1635 tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1636
1637 Bool_t doBetter = kFALSE;
1638 Bool_t doEvenBetter = kFALSE;
1639 if (doBetter) {
1640 args[0] = 1e-04;
1641 tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1642
1643 if (doEvenBetter) {
1644 args[0] = 500;
1645 tfitter->ExecuteCommand( "MINOS", args, 1 );
1646 }
1647 }
1648}
1649
1650////////////////////////////////////////////////////////////////////////////////
1651/// Evaluate the minimisation function
1652///
1653/// Input parameters:
1654/// - npars: number of currently variable parameters
1655/// CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1656/// - fitPars: array of (constant and variable) parameters
1657/// - iflag: indicates what is to be calculated (see example below)
1658/// - grad: array of gradients
1659///
1660/// Output parameters:
1661/// - f: the calculated function value.
1662/// - grad: the (optional) vector of first derivatives).
1663
1664void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1665{
1666 ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1667}
1668
1669TTHREAD_TLS(Int_t) nc = 0;
1670TTHREAD_TLS(double) minf = 1000000;
1671
1672void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1673{
1674 // first update the weights
1675 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1676 TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1677 synapse->SetWeight(fitPars[ipar]);
1678 }
1679
1680 // now compute the estimator
1681 f = CalculateEstimator();
1682
1683 nc++;
1684 if (f < minf) minf = f;
1685 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1686 Log() << kDEBUG << Endl;
1687 Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1688}
1689
1690////////////////////////////////////////////////////////////////////////////////
1691/// global "this" pointer to be used in minuit
1692
1693TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1694{
1695 return fgThis;
1696}
1697
1698#endif
1699
1700
1701////////////////////////////////////////////////////////////////////////////////
1702/// write specific classifier response
1703
1704void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1705{
1706 MethodANNBase::MakeClassSpecific(fout, className);
1707}
1708
1709////////////////////////////////////////////////////////////////////////////////
1710/// get help message text
1711///
1712/// typical length of text line:
1713/// "|--------------------------------------------------------------|"
1714
1716{
1717 TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1718 TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1719
1720 Log() << Endl;
1721 Log() << col << "--- Short description:" << colres << Endl;
1722 Log() << Endl;
1723 Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1724 Log() << "forward multilayer perceptron implementation. The MLP has a user-" << Endl;
1725 Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1726 Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1727 Log() << "signal and one background). " << Endl;
1728 Log() << Endl;
1729 Log() << col << "--- Performance optimisation:" << colres << Endl;
1730 Log() << Endl;
1731 Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1732 Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1733 Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1734 Log() << "number of input variables that have only little discrimination power. " << Endl;
1735 Log() << "" << Endl;
1736 Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1737 Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1738 Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1739 Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1740 Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1741 Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1742 Log() << "competitive results)." << Endl;
1743 Log() << Endl;
1744 Log() << col << "Overtraining: " << colres
1745 << "only the TMlpANN performs an explicit separation of the" << Endl;
1746 Log() << "full training sample into independent training and validation samples." << Endl;
1747 Log() << "We have found that in most high-energy physics applications the " << Endl;
1748 Log() << "available degrees of freedom (training events) are sufficient to " << Endl;
1749 Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1750 Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1751 Log() << "the use of validation samples would only reduce the available training" << Endl;
1752 Log() << "information. However, if the performance on the training sample is " << Endl;
1753 Log() << "found to be significantly better than the one found with the inde-" << Endl;
1754 Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1755 Log() << "are printed to standard output at the end of each training job." << Endl;
1756 Log() << Endl;
1757 Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1758 Log() << Endl;
1759 Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1760 Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1761 Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1762 Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1763 Log() << "in favour of more neurons in the first hidden layer." << Endl;
1764 Log() << "" << Endl;
1765 Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1766 Log() << "adjustable weights is small compared to the training sample size," << Endl;
1767 Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1768}
1769
#define REGISTER_METHOD(CLASS)
for example
#define d(i)
Definition: RSha256.hxx:102
#define f(i)
Definition: RSha256.hxx:104
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:45
unsigned int UInt_t
Definition: RtypesCore.h:46
const Bool_t kFALSE
Definition: RtypesCore.h:101
bool Bool_t
Definition: RtypesCore.h:63
double Double_t
Definition: RtypesCore.h:59
float Float_t
Definition: RtypesCore.h:57
const Bool_t kTRUE
Definition: RtypesCore.h:100
#define ClassImp(name)
Definition: Rtypes.h:364
char name[80]
Definition: TGX11.cxx:110
int type
Definition: TGX11.cxx:121
double sqrt(double)
double exp(double)
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
char * Form(const char *fmt,...)
The ROOT standard fitter based on TMinuit.
Definition: TFitter.h:19
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter
Definition: TFitter.cxx:596
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition: TFitter.cxx:92
virtual void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t))
Specify the address of the fitting algorithm.
Definition: TFitter.cxx:564
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
TH1 is the base class of all histogram classes in ROOT.
Definition: TH1.h:58
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3350
Bool_t WriteOptionsReference() const
Definition: Config.h:67
Class that contains all the data information.
Definition: DataSetInfo.h:62
std::vector< Float_t > & GetTargets()
Definition: Event.h:103
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:381
UInt_t GetClass() const
Definition: Event.h:86
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
Base class for TMVA fitters.
Definition: FitterBase.h:51
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:74
Fitter using a Genetic Algorithm.
Definition: GeneticFitter.h:44
The TMVA::Interval Class.
Definition: Interval.h:61
Base class for all TMVA methods using artificial neural networks.
Definition: MethodANNBase.h:62
virtual void ProcessOptions()
do nothing specific at this moment
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Multilayer Perceptron class built off of MethodANNBase.
Definition: MethodMLP.h:69
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:840
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1715
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Definition: MethodMLP.cxx:1037
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1003
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1549
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1704
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1434
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:774
void TrainOneEpoch()
train network over a single epoch/cycle of events
Definition: MethodMLP.cxx:1142
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:152
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:787
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1393
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
Definition: MethodMLP.cxx:278
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Definition: MethodMLP.cxx:1328
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:825
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1020
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:138
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:808
void Shuffle(Int_t *index, Int_t n)
Input:
Definition: MethodMLP.cxx:1190
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:734
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:950
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:672
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1385
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1508
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:487
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1412
void Init()
default initializations
Definition: MethodMLP.cxx:164
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:247
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1258
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1277
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1356
Double_t GetError()
Definition: MethodMLP.cxx:966
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum
Definition: MethodMLP.cxx:1208
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1222
void ComputeDEDw()
Definition: MethodMLP.cxx:697
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1286
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition: MethodMLP.cxx:90
void UpdateRegulators()
Definition: MethodMLP.cxx:1468
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodMLP.cxx:195
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:292
Neuron class used by TMVA artificial neural network methods.
Definition: TNeuron.h:49
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
Definition: TNeuron.cxx:263
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition: TNeuron.cxx:84
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition: TNeuron.cxx:242
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition: TNeuron.cxx:224
void CalculateDelta()
calculate error field
Definition: TNeuron.cxx:115
Synapse class used by TMVA artificial neural network methods.
Definition: TSynapse.h:42
void SetWeight(Double_t weight)
set synapse weight
Definition: TSynapse.cxx:68
Double_t GetWeight()
Definition: TSynapse.h:53
void SetDEDw(Double_t DEDw)
Definition: TSynapse.h:85
void SetLearningRate(Double_t rate)
Definition: TSynapse.h:56
Double_t GetDEDw()
Definition: TSynapse.h:86
void InitDelta()
Definition: TSynapse.h:83
Double_t GetDelta()
Definition: TSynapse.h:87
void DecayLearningRate(Double_t rate)
Definition: TSynapse.h:62
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition: TSynapse.cxx:108
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:202
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:122
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
Singleton class for Global types used by TMVA.
Definition: Types.h:73
EAnalysisType
Definition: Types.h:128
@ kMulticlass
Definition: Types.h:131
@ kClassification
Definition: Types.h:129
@ kRegression
Definition: Types.h:130
@ kTraining
Definition: Types.h:145
@ kTesting
Definition: Types.h:146
@ kDEBUG
Definition: Types.h:58
@ kHEADER
Definition: Types.h:65
@ kINFO
Definition: Types.h:60
@ kWARNING
Definition: Types.h:61
@ kFATAL
Definition: Types.h:63
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
Definition: TMatrixT.cxx:1211
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1397
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1470
An array of TObjects.
Definition: TObjArray.h:37
Int_t GetEntriesFast() const
Definition: TObjArray.h:64
TObject * At(Int_t idx) const
Definition: TObjArray.h:166
Basic string class.
Definition: TString.h:136
This is a simple weighted bidirectional connection between two neurons.
Definition: TSynapse.h:20
void SetWeight(Double_t w)
Sets the weight of the synapse.
Definition: TSynapse.cxx:102
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
double gamma(double x)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Config & gConfig()
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Exp(Double_t x)
Definition: TMath.h:727
Double_t Log(Double_t x)
Definition: TMath.h:760
Double_t Sqrt(Double_t x)
Definition: TMath.h:691
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:348
auto * a
Definition: textangle.C:12
static void output(int code)
Definition: gifencode.c:226