76#ifdef MethodMLP_UseMinuit__
95 fUseRegulator(false), fCalculateErrors(false),
96 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
97 fTrainingMethod(kBFGS), fTrainMethodS(
"BFGS"),
98 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
99 fSamplingTraining(false), fSamplingTesting(false),
100 fLastAlpha(0.0), fTau(0.),
101 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
102 fBPMode(kSequential), fBpModeS(
"None"),
103 fBatchSize(0), fTestRate(0), fEpochMon(false),
104 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
105 fGA_SC_rate(0), fGA_SC_factor(0.0),
106 fDeviationsFromTargets(0),
118 fUseRegulator(false), fCalculateErrors(false),
119 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
120 fTrainingMethod(kBFGS), fTrainMethodS(
"BFGS"),
121 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
122 fSamplingTraining(false), fSamplingTesting(false),
123 fLastAlpha(0.0), fTau(0.),
124 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
125 fBPMode(kSequential), fBpModeS(
"None"),
126 fBatchSize(0), fTestRate(0), fEpochMon(false),
127 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
128 fGA_SC_rate(0), fGA_SC_factor(0.0),
129 fDeviationsFromTargets(0),
167 SetSignalReferenceCut( 0.5 );
168#ifdef MethodMLP_UseMinuit__
197 DeclareOptionRef(fTrainMethodS=
"BP",
"TrainingMethod",
198 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
203 DeclareOptionRef(fLearnRate=0.02,
"LearningRate",
"ANN learning rate parameter");
204 DeclareOptionRef(fDecayRate=0.01,
"DecayRate",
"Decay rate for learning parameter");
205 DeclareOptionRef(fTestRate =10,
"TestRate",
"Test for overtraining performed at each #th epochs");
206 DeclareOptionRef(fEpochMon =
kFALSE,
"EpochMonitoring",
"Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
208 DeclareOptionRef(fSamplingFraction=1.0,
"Sampling",
"Only 'Sampling' (randomly selected) events are trained each epoch");
209 DeclareOptionRef(fSamplingEpoch=1.0,
"SamplingEpoch",
"Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
210 DeclareOptionRef(fSamplingWeight=1.0,
"SamplingImportance",
" The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
212 DeclareOptionRef(fSamplingTraining=
kTRUE,
"SamplingTraining",
"The training sample is sampled");
213 DeclareOptionRef(fSamplingTesting=
kFALSE,
"SamplingTesting" ,
"The testing sample is sampled");
215 DeclareOptionRef(fResetStep=50,
"ResetStep",
"How often BFGS should reset history");
216 DeclareOptionRef(fTau =3.0,
"Tau",
"LineSearch \"size step\"");
218 DeclareOptionRef(fBpModeS=
"sequential",
"BPMode",
219 "Back-propagation learning mode: sequential or batch");
220 AddPreDefVal(
TString(
"sequential"));
221 AddPreDefVal(
TString(
"batch"));
223 DeclareOptionRef(fBatchSize=-1,
"BatchSize",
224 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
226 DeclareOptionRef(fImprovement=1
e-30,
"ConvergenceImprove",
227 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
229 DeclareOptionRef(fSteps=-1,
"ConvergenceTests",
230 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
232 DeclareOptionRef(fUseRegulator=
kFALSE,
"UseRegulator",
233 "Use regulator to avoid over-training");
234 DeclareOptionRef(fUpdateLimit=10000,
"UpdateLimit",
235 "Maximum times of regulator update");
236 DeclareOptionRef(fCalculateErrors=
kFALSE,
"CalculateErrors",
237 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value");
239 DeclareOptionRef(fWeightRange=1.0,
"WeightRange",
240 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
252 if (IgnoreEventsWithNegWeightsInTraining()) {
254 <<
"Will ignore negative events in training!"
259 if (fTrainMethodS ==
"BP" ) fTrainingMethod = kBP;
260 else if (fTrainMethodS ==
"BFGS") fTrainingMethod = kBFGS;
261 else if (fTrainMethodS ==
"GA" ) fTrainingMethod = kGA;
263 if (fBpModeS ==
"sequential") fBPMode = kSequential;
264 else if (fBpModeS ==
"batch") fBPMode = kBatch;
268 if (fBPMode == kBatch) {
270 Int_t numEvents = Data()->GetNEvents();
271 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
280 Log() << kDEBUG <<
"Initialize learning rates" <<
Endl;
282 Int_t numSynapses = fSynapses->GetEntriesFast();
283 for (
Int_t i = 0; i < numSynapses; i++) {
284 synapse = (
TSynapse*)fSynapses->At(i);
296 Log() << kFATAL <<
"<CalculateEstimator> fatal error: wrong tree type: " << treeType <<
Endl;
300 Data()->SetCurrentType(treeType);
311 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
312 histS =
new TH1F( nameS, nameS, nbin, -limit, limit );
313 histB =
new TH1F( nameB, nameB, nbin, -limit, limit );
319 Int_t nEvents = GetNEvents();
320 UInt_t nClasses = DataInfo().GetNClasses();
321 UInt_t nTgts = DataInfo().GetNTargets();
325 if( fWeightRange < 1.f ){
326 fDeviationsFromTargets =
new std::vector<std::pair<Float_t,Float_t> >(nEvents);
329 for (
Int_t i = 0; i < nEvents; i++) {
331 const Event* ev = GetEvent(i);
333 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
340 ForceNetworkInputs( ev );
341 ForceNetworkCalculations();
344 if (DoRegression()) {
345 for (
UInt_t itgt = 0; itgt < nTgts; itgt++) {
346 v = GetOutputNeuron( itgt )->GetActivationValue();
352 }
else if (DoMulticlass() ) {
354 if (fEstimator==kCE){
356 for (
UInt_t icls = 0; icls < nClasses; icls++) {
357 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
358 norm +=
exp( activationValue );
360 d =
exp( activationValue );
365 for (
UInt_t icls = 0; icls < nClasses; icls++) {
366 Double_t desired = (icls==cls) ? 1.0 : 0.0;
367 v = GetOutputNeuron( icls )->GetActivationValue();
368 d = (desired-
v)*(desired-
v);
373 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
374 v = GetOutputNeuron()->GetActivationValue();
375 if (fEstimator==kMSE)
d = (desired-
v)*(desired-
v);
380 if( fDeviationsFromTargets )
381 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(
d,w));
387 if (DataInfo().IsSignal(ev) && histS != 0) histS->
Fill(
float(
v), float(w) );
388 else if (histB != 0) histB->
Fill(
float(
v), float(w) );
392 if( fDeviationsFromTargets ) {
393 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
395 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
398 Float_t weightRangeCut = fWeightRange*sumOfWeights;
400 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
401 float deviation = (*itDev).first;
402 float devWeight = (*itDev).second;
403 weightSum += devWeight;
404 if( weightSum <= weightRangeCut ) {
405 estimator += devWeight*deviation;
409 sumOfWeights = sumOfWeightsInRange;
410 delete fDeviationsFromTargets;
413 if (histS != 0) fEpochMonHistS.push_back( histS );
414 if (histB != 0) fEpochMonHistB.push_back( histB );
419 estimator = estimator/
Float_t(sumOfWeights);
424 Data()->SetCurrentType( saveType );
427 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType ==
Types::kTraining) {
428 CreateWeightMonitoringHists(
Form(
"epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
440 Log() << kFATAL <<
"ANN Network is not initialized, doing it now!"<<
Endl;
441 SetAnalysisType(GetAnalysisType());
443 Log() << kDEBUG <<
"reinitialize learning rates" <<
Endl;
444 InitializeLearningRates();
446 PrintMessage(
"Training Network");
448 Int_t nEvents=GetNEvents();
449 Int_t nSynapses=fSynapses->GetEntriesFast();
450 if (nSynapses>nEvents)
451 Log()<<kWARNING<<
"ANN too complicated: #events="<<nEvents<<
"\t#synapses="<<nSynapses<<
Endl;
453 fIPyMaxIter = nEpochs;
454 if (fInteractive && fInteractive->NotInitialized()){
455 std::vector<TString> titles = {
"Error on training set",
"Error on test set"};
456 fInteractive->Init(titles);
459#ifdef MethodMLP_UseMinuit__
460 if (useMinuit) MinuitMinimize();
462 if (fTrainingMethod == kGA) GeneticMinimize();
463 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
464 else BackPropagationMinimize(nEpochs);
470 Log()<<kINFO<<
"Finalizing handling of Regulator terms, trainE="<<trainE<<
" testE="<<testE<<
Endl;
472 Log()<<kINFO<<
"Done with handling of Regulator terms"<<
Endl;
475 if( fCalculateErrors || fUseRegulator )
477 Int_t numSynapses=fSynapses->GetEntriesFast();
478 fInvHessian.ResizeTo(numSynapses,numSynapses);
479 GetApproxInvHessian( fInvHessian ,
false);
489 Timer timer( (fSteps>0?100:nEpochs), GetName() );
495 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
496 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
497 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
498 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
501 Int_t nSynapses = fSynapses->GetEntriesFast();
502 Int_t nWeights = nSynapses;
504 for (
Int_t i=0;i<nSynapses;i++) {
509 std::vector<Double_t> buffer( nWeights );
510 for (
Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
513 TMatrixD Hessian ( nWeights, nWeights );
517 Int_t RegUpdateTimes=0;
525 if(fSamplingTraining || fSamplingTesting)
526 Data()->InitSampling(1.0,1.0,fRandomSeed);
528 if (fSteps > 0) Log() << kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
532 for (
Int_t i = 0; i < nEpochs; i++) {
534 if (fExitFromTraining)
break;
536 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
537 if ((i+1)%fTestRate == 0 || (i == 0)) {
538 if (fSamplingTraining) {
540 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
541 Data()->CreateSampling();
543 if (fSamplingTesting) {
545 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
546 Data()->CreateSampling();
552 Data()->InitSampling(1.0,1.0);
554 Data()->InitSampling(1.0,1.0);
565 SetGammaDelta( Gamma, Delta, buffer );
567 if (i % fResetStep == 0 && i<0.5*nEpochs) {
573 if (GetHessian( Hessian, Gamma, Delta )) {
578 else SetDir( Hessian, Dir );
582 if (DerivDir( Dir ) > 0) {
587 if (LineSearch( Dir, buffer, &dError )) {
591 if (LineSearch(Dir, buffer, &dError)) {
593 Log() << kFATAL <<
"Line search failed! Huge troubles somewhere..." <<
Endl;
598 if (dError<0) Log()<<kWARNING<<
"\nnegative dError=" <<dError<<
Endl;
601 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
602 Log()<<kDEBUG<<
"\n\nUpdate regulators "<<RegUpdateTimes<<
" on epoch "<<i<<
"\tdError="<<dError<<
Endl;
612 if ((i+1)%fTestRate == 0) {
617 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
620 fEstimatorHistTrain->Fill( i+1, trainE );
621 fEstimatorHistTest ->Fill( i+1, testE );
624 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1
e-100)) {
627 Data()->EventResult( success );
629 SetCurrentValue( testE );
630 if (HasConverged()) {
631 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
632 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
634 ResetConvergenceCounter();
641 TString convText =
Form(
"<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i );
644 if (
Float_t(i)/nEpochs < fSamplingEpoch)
646 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
650 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
652 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit;
653 if (progress2>progress) progress=progress2;
658 if (progress<i) progress=i;
674 Int_t nWeights = fSynapses->GetEntriesFast();
677 Int_t nSynapses = fSynapses->GetEntriesFast();
678 for (
Int_t i=0;i<nSynapses;i++) {
680 Gamma[IDX++][0] = -synapse->
GetDEDw();
683 for (
Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
688 for (
Int_t i=0;i<nSynapses;i++)
691 Gamma[IDX++][0] += synapse->
GetDEDw();
699 Int_t nSynapses = fSynapses->GetEntriesFast();
700 for (
Int_t i=0;i<nSynapses;i++) {
705 Int_t nEvents = GetNEvents();
706 Int_t nPosEvents = nEvents;
707 for (
Int_t i=0;i<nEvents;i++) {
709 const Event* ev = GetEvent(i);
710 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
718 for (
Int_t j=0;j<nSynapses;j++) {
724 for (
Int_t i=0;i<nSynapses;i++) {
727 if (fUseRegulator) DEDw+=fPriorDev[i];
728 synapse->
SetDEDw( DEDw / nPosEvents );
738 ForceNetworkInputs( ev );
739 ForceNetworkCalculations();
741 if (DoRegression()) {
742 UInt_t ntgt = DataInfo().GetNTargets();
743 for (
UInt_t itgt = 0; itgt < ntgt; itgt++) {
745 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
746 GetOutputNeuron( itgt )->SetError(error);
748 }
else if (DoMulticlass()) {
749 UInt_t nClasses = DataInfo().GetNClasses();
751 for (
UInt_t icls = 0; icls < nClasses; icls++) {
752 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
753 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
754 GetOutputNeuron( icls )->SetError(error);
757 Double_t desired = GetDesiredOutput( ev );
759 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight;
760 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired);
761 GetOutputNeuron()->SetError(error);
764 CalculateNeuronDeltas();
765 for (
Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
777 Int_t nSynapses = fSynapses->GetEntriesFast();
779 for (
Int_t i=0;i<nSynapses;i++) {
781 Dir[IDX++][0] = -synapse->
GetDEDw();
811 Int_t nSynapses = fSynapses->GetEntriesFast();
814 for (
Int_t i=0;i<nSynapses;i++) {
816 DEDw[IDX++][0] = synapse->
GetDEDw();
819 dir = Hessian * DEDw;
820 for (
Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
828 Int_t nSynapses = fSynapses->GetEntriesFast();
831 for (
Int_t i=0;i<nSynapses;i++) {
833 Result += Dir[IDX++][0] * synapse->
GetDEDw();
843 Int_t nSynapses = fSynapses->GetEntriesFast();
844 Int_t nWeights = nSynapses;
846 std::vector<Double_t> Origin(nWeights);
847 for (
Int_t i=0;i<nSynapses;i++) {
858 if (alpha2 < 0.01) alpha2 = 0.01;
859 else if (alpha2 > 2.0) alpha2 = 2.0;
863 SetDirWeights( Origin, Dir, alpha2 );
871 for (
Int_t i=0;i<100;i++) {
873 SetDirWeights(Origin, Dir, alpha3);
885 SetDirWeights(Origin, Dir, 0.);
890 for (
Int_t i=0;i<100;i++) {
893 Log() << kWARNING <<
"linesearch, starting to investigate direction opposite of steepestDIR" <<
Endl;
894 alpha2 = -alpha_original;
896 SetDirWeights(Origin, Dir, alpha2);
906 SetDirWeights(Origin, Dir, 0.);
907 Log() << kWARNING <<
"linesearch, failed even in opposite direction of steepestDIR" <<
Endl;
913 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
914 fLastAlpha = 0.5 * (alpha1 + alpha3 -
915 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
916 - ( err2 - err1 ) / (alpha2 - alpha1 )));
922 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
924 SetDirWeights(Origin, Dir, fLastAlpha);
931 if (finalError > err1) {
932 Log() << kWARNING <<
"Line search increased error! Something is wrong."
933 <<
"fLastAlpha=" << fLastAlpha <<
"al123=" << alpha1 <<
" "
934 << alpha2 <<
" " << alpha3 <<
" err1="<< err1 <<
" errfinal=" << finalError <<
Endl;
937 for (
Int_t i=0;i<nSynapses;i++) {
939 buffer[IDX] = synapse->
GetWeight() - Origin[IDX];
943 if (dError) (*dError)=(errOrigin-finalError)/finalError;
953 Int_t nSynapses = fSynapses->GetEntriesFast();
955 for (
Int_t i=0;i<nSynapses;i++) {
957 synapse->
SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
960 if (fUseRegulator) UpdatePriors();
968 Int_t nEvents = GetNEvents();
969 UInt_t ntgts = GetNTargets();
972 for (
Int_t i=0;i<nEvents;i++) {
973 const Event* ev = GetEvent(i);
975 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
982 if (DoRegression()) {
983 for (
UInt_t itgt = 0; itgt < ntgts; itgt++) {
984 error += GetMSEErr( ev, itgt );
986 }
else if ( DoMulticlass() ){
987 for(
UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
988 error += GetMSEErr( ev, icls );
991 if (fEstimator==kMSE) error = GetMSEErr( ev );
992 else if (fEstimator==kCE) error= GetCEErr( ev );
996 if (fUseRegulator) Result+=fPrior;
997 if (Result<0) Log()<<kWARNING<<
"\nNegative Error!!! :"<<Result-fPrior<<
"+"<<fPrior<<
Endl;
1006 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1008 if (DoRegression()) target = ev->
GetTarget( index );
1009 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
1010 else target = GetDesiredOutput( ev );
1023 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1025 if (DoRegression()) target = ev->
GetTarget( index );
1026 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
1027 else target = GetDesiredOutput( ev );
1040 Timer timer( (fSteps>0?100:nEpochs), GetName() );
1047 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
1048 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1049 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
1050 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1052 if(fSamplingTraining || fSamplingTesting)
1053 Data()->InitSampling(1.0,1.0,fRandomSeed);
1055 if (fSteps > 0) Log() << kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
1063 for (
Int_t i = 0; i < nEpochs; i++) {
1065 if (fExitFromTraining)
break;
1066 fIPyCurrentIter = i;
1067 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1068 if ((i+1)%fTestRate == 0 || (i == 0)) {
1069 if (fSamplingTraining) {
1071 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1072 Data()->CreateSampling();
1074 if (fSamplingTesting) {
1076 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1077 Data()->CreateSampling();
1083 Data()->InitSampling(1.0,1.0);
1085 Data()->InitSampling(1.0,1.0);
1090 DecaySynapseWeights(i >= lateEpoch);
1093 if ((i+1)%fTestRate == 0) {
1096 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1099 fEstimatorHistTrain->Fill( i+1, trainE );
1100 fEstimatorHistTest ->Fill( i+1, testE );
1103 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1
e-100)) {
1106 Data()->EventResult( success );
1108 SetCurrentValue( testE );
1109 if (HasConverged()) {
1110 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1111 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
1113 ResetConvergenceCounter();
1116 if (lateEpoch > i) lateEpoch = i;
1123 TString convText =
Form(
"<D^2> (train/test): %.4g/%.4g", trainE, testE );
1126 if (
Float_t(i)/nEpochs < fSamplingEpoch)
1127 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1129 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1144 Int_t nEvents = Data()->GetNEvents();
1148 for (
Int_t i = 0; i < nEvents; i++) index[i] = i;
1149 Shuffle(index, nEvents);
1152 for (
Int_t i = 0; i < nEvents; i++) {
1154 const Event * ev = GetEvent(index[i]);
1155 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1160 TrainOneEvent(index[i]);
1163 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1164 AdjustSynapseWeights();
1165 if (fgPRINT_BATCH) {
1194 for (
Int_t i = 0; i <
n; i++) {
1195 j = (
Int_t) (frgen->Rndm() *
a);
1198 index[j] = index[i];
1211 Int_t numSynapses = fSynapses->GetEntriesFast();
1212 for (
Int_t i = 0; i < numSynapses; i++) {
1213 synapse = (
TSynapse*)fSynapses->At(i);
1236 if (
type == 0) desired = fOutput->GetMin();
1237 else desired = fOutput->GetMax();
1243 for (
UInt_t j = 0; j < GetNvar(); j++) {
1246 neuron = GetInputNeuron(j);
1250 ForceNetworkCalculations();
1251 UpdateNetwork(desired, eventWeight);
1265 const Event * ev = GetEvent(ievt);
1267 ForceNetworkInputs( ev );
1268 ForceNetworkCalculations();
1269 if (DoRegression()) UpdateNetwork( ev->
GetTargets(), eventWeight );
1270 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1271 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1279 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin();
1288 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1289 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ;
1290 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired);
1291 else Log() << kFATAL <<
"Estimator type unspecified!!" <<
Endl;
1292 error *= eventWeight;
1293 GetOutputNeuron()->SetError(error);
1294 CalculateNeuronDeltas();
1306 for (
UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1307 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1312 for (
UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1313 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1316 error *= eventWeight;
1317 GetOutputNeuron(i)->SetError(error);
1321 CalculateNeuronDeltas();
1332 Int_t numLayers = fNetwork->GetEntriesFast();
1337 for (
Int_t i = numLayers-1; i >= 0; i--) {
1341 for (
Int_t j = 0; j < numNeurons; j++) {
1358 PrintMessage(
"Minimizing Estimator with GA");
1364 fGA_SC_factor = 0.95;
1368 std::vector<Interval*> ranges;
1370 Int_t numWeights = fSynapses->GetEntriesFast();
1371 for (
Int_t ivar=0; ivar< numWeights; ivar++) {
1372 ranges.push_back(
new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1378 Double_t estimator = CalculateEstimator();
1379 Log() << kINFO <<
"GA: estimator after optimization: " << estimator <<
Endl;
1387 return ComputeEstimator( parameters );
1396 Int_t numSynapses = fSynapses->GetEntriesFast();
1398 for (
Int_t i = 0; i < numSynapses; i++) {
1399 synapse = (
TSynapse*)fSynapses->At(i);
1402 if (fUseRegulator) UpdatePriors();
1404 Double_t estimator = CalculateEstimator();
1417 Int_t numLayers = fNetwork->GetEntriesFast();
1419 for (
Int_t i = 0; i < numLayers; i++) {
1423 for (
Int_t j = 0; j < numNeurons; j++) {
1439 Int_t numLayers = fNetwork->GetEntriesFast();
1441 for (
Int_t i = numLayers-1; i >= 0; i--) {
1445 for (
Int_t j = 0; j < numNeurons; j++) {
1458 Int_t nSynapses = fSynapses->GetEntriesFast();
1459 for (
Int_t i=0;i<nSynapses;i++) {
1461 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight())*(synapse->
GetWeight());
1462 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight()));
1471 GetApproxInvHessian(InvH);
1472 Int_t numSynapses=fSynapses->GetEntriesFast();
1473 Int_t numRegulators=fRegulators.size();
1476 std::vector<Int_t> nWDP(numRegulators);
1477 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1478 for (
int i=0;i<numSynapses;i++) {
1480 Int_t idx=fRegulatorIdx[i];
1482 trace[idx]+=InvH[i][i];
1483 gamma+=1-fRegulators[idx]*InvH[i][i];
1486 if (fEstimator==kMSE) {
1487 if (GetNEvents()>gamma) variance=CalculateEstimator(
Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1492 for (
int i=0;i<numRegulators;i++)
1495 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1496 if (fRegulators[i]<0) fRegulators[i]=0;
1497 Log()<<kDEBUG<<
"R"<<i<<
":"<<fRegulators[i]<<
"\t";
1502 Log()<<kDEBUG<<
"\n"<<
"trainE:"<<trainE<<
"\ttestE:"<<testE<<
"\tvariance:"<<variance<<
"\tgamma:"<<gamma<<
Endl;
1510 Int_t numSynapses=fSynapses->GetEntriesFast();
1511 InvHessian.
ResizeTo( numSynapses, numSynapses );
1515 Int_t nEvents = GetNEvents();
1516 for (
Int_t i=0;i<nEvents;i++) {
1518 double outputValue=GetMvaValue();
1519 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1520 CalculateNeuronDeltas();
1521 for (
Int_t j = 0; j < numSynapses; j++){
1525 sens[j][0]=sensT[0][j]=synapses->
GetDelta();
1527 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1528 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1533 for (
Int_t i = 0; i < numSynapses; i++){
1534 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1538 for (
Int_t i = 0; i < numSynapses; i++){
1539 InvHessian[i][i]+=1
e-6;
1554 if (!fCalculateErrors || errLower==0 || errUpper==0)
1557 Double_t MvaUpper,MvaLower,median,variance;
1558 Int_t numSynapses=fSynapses->GetEntriesFast();
1559 if (fInvHessian.GetNcols()!=numSynapses) {
1560 Log() << kWARNING <<
"inconsistent dimension " << fInvHessian.GetNcols() <<
" vs " << numSynapses <<
Endl;
1564 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1566 CalculateNeuronDeltas();
1567 for (
Int_t i = 0; i < numSynapses; i++){
1574 TMatrixD sig=sensT*fInvHessian*sens;
1576 median=GetOutputNeuron()->GetValue();
1579 Log()<<kWARNING<<
"Negative variance!!! median=" << median <<
"\tvariance(sigma^2)=" << variance <<
Endl;
1582 variance=
sqrt(variance);
1585 MvaUpper=fOutput->Eval(median+variance);
1587 *errUpper=MvaUpper-MvaValue;
1590 MvaLower=fOutput->Eval(median-variance);
1592 *errLower=MvaValue-MvaLower;
1598#ifdef MethodMLP_UseMinuit__
1603void TMVA::MethodMLP::MinuitMinimize()
1605 fNumberOfWeights = fSynapses->GetEntriesFast();
1620 for (
Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1623 parName, w[ipar], 0.1, 0, 0 );
1627 tfitter->
SetFCN( &IFCN );
1666 ((MethodMLP*)GetThisPtr())->FCN( npars, grad,
f, fitPars, iflag );
1669TTHREAD_TLS(
Int_t) nc = 0;
1670TTHREAD_TLS(
double) minf = 1000000;
1675 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1681 f = CalculateEstimator();
1684 if (
f < minf) minf =
f;
1685 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++)
Log() << kDEBUG << fitPars[ipar] <<
" ";
1687 Log() << kDEBUG <<
"***** New estimator: " <<
f <<
" min: " << minf <<
" --> ncalls: " << nc <<
Endl;
1721 Log() << col <<
"--- Short description:" << colres <<
Endl;
1723 Log() <<
"The MLP artificial neural network (ANN) is a traditional feed-" <<
Endl;
1724 Log() <<
"forward multilayer perceptron implementation. The MLP has a user-" <<
Endl;
1725 Log() <<
"defined hidden layer architecture, while the number of input (output)" <<
Endl;
1726 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1727 Log() <<
"signal and one background). " <<
Endl;
1729 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
1731 Log() <<
"Neural networks are stable and performing for a large variety of " <<
Endl;
1732 Log() <<
"linear and non-linear classification problems. However, in contrast" <<
Endl;
1733 Log() <<
"to (e.g.) boosted decision trees, the user is advised to reduce the " <<
Endl;
1734 Log() <<
"number of input variables that have only little discrimination power. " <<
Endl;
1735 Log() <<
"" <<
Endl;
1736 Log() <<
"In the tests we have carried out so far, the MLP and ROOT networks" <<
Endl;
1737 Log() <<
"(TMlpANN, interfaced via TMVA) performed equally well, with however" <<
Endl;
1738 Log() <<
"a clear speed advantage for the MLP. The Clermont-Ferrand neural " <<
Endl;
1739 Log() <<
"net (CFMlpANN) exhibited worse classification performance in these" <<
Endl;
1740 Log() <<
"tests, which is partly due to the slow convergence of its training" <<
Endl;
1741 Log() <<
"(at least 10k training cycles are required to achieve approximately" <<
Endl;
1742 Log() <<
"competitive results)." <<
Endl;
1744 Log() << col <<
"Overtraining: " << colres
1745 <<
"only the TMlpANN performs an explicit separation of the" <<
Endl;
1746 Log() <<
"full training sample into independent training and validation samples." <<
Endl;
1747 Log() <<
"We have found that in most high-energy physics applications the " <<
Endl;
1748 Log() <<
"available degrees of freedom (training events) are sufficient to " <<
Endl;
1749 Log() <<
"constrain the weights of the relatively simple architectures required" <<
Endl;
1750 Log() <<
"to achieve good performance. Hence no overtraining should occur, and " <<
Endl;
1751 Log() <<
"the use of validation samples would only reduce the available training" <<
Endl;
1752 Log() <<
"information. However, if the performance on the training sample is " <<
Endl;
1753 Log() <<
"found to be significantly better than the one found with the inde-" <<
Endl;
1754 Log() <<
"pendent test sample, caution is needed. The results for these samples " <<
Endl;
1755 Log() <<
"are printed to standard output at the end of each training job." <<
Endl;
1757 Log() << col <<
"--- Performance tuning via configuration options:" << colres <<
Endl;
1759 Log() <<
"The hidden layer architecture for all ANNs is defined by the option" <<
Endl;
1760 Log() <<
"\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" <<
Endl;
1761 Log() <<
"neurons and the second N neurons (and so on), and where N is the number " <<
Endl;
1762 Log() <<
"of input variables. Excessive numbers of hidden layers should be avoided," <<
Endl;
1763 Log() <<
"in favour of more neurons in the first hidden layer." <<
Endl;
1764 Log() <<
"" <<
Endl;
1765 Log() <<
"The number of cycles should be above 500. As said, if the number of" <<
Endl;
1766 Log() <<
"adjustable weights is small compared to the training sample size," <<
Endl;
1767 Log() <<
"using a large number of training samples should not lead to overtraining." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
The ROOT standard fitter based on TMinuit.
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
virtual void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t))
Specify the address of the fitting algorithm.
1-D histogram with a float per channel (see TH1 documentation)}
TH1 is the base class of all histogram classes in ROOT.
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Bool_t WriteOptionsReference() const
Class that contains all the data information.
std::vector< Float_t > & GetTargets()
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetTarget(UInt_t itgt) const
Base class for TMVA fitters.
Double_t Run()
estimator function interface for fitting
Fitter using a Genetic Algorithm.
The TMVA::Interval Class.
Base class for all TMVA methods using artificial neural networks.
virtual void ProcessOptions()
do nothing specific at this moment
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Multilayer Perceptron class built off of MethodANNBase.
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
void GetHelpMessage() const
get help message text
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
void SteepestDir(TMatrixD &Dir)
void TrainOneEpoch()
train network over a single epoch/cycle of events
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Double_t ComputeEstimator(std::vector< Double_t > ¶meters)
this function is called by GeneticANN for GA optimization
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Double_t DerivDir(TMatrixD &Dir)
Double_t GetCEErr(const Event *ev, UInt_t index=0)
virtual ~MethodMLP()
destructor nothing to be done
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
void Shuffle(Int_t *index, Int_t n)
Input:
void SimulateEvent(const Event *ev)
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Double_t EstimatorFunction(std::vector< Double_t > ¶meters)
interface to the estimate
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
void Init()
default initializations
void ProcessOptions()
process user options
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
void DeclareOptions()
define the options (their key words) that can be set in the option string
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Neuron class used by TMVA artificial neural network methods.
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
void CalculateDelta()
calculate error field
Synapse class used by TMVA artificial neural network methods.
void SetWeight(Double_t weight)
set synapse weight
void SetDEDw(Double_t DEDw)
void SetLearningRate(Double_t rate)
void DecayLearningRate(Double_t rate)
void CalculateDelta()
calculate/adjust the error field for this synapse
Timing information for training and evaluation of MVA methods.
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Singleton class for Global types used by TMVA.
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Int_t GetEntriesFast() const
TObject * At(Int_t idx) const
This is a simple weighted bidirectional connection between two neurons.
void SetWeight(Double_t w)
Sets the weight of the synapse.
MsgLogger & Endl(MsgLogger &ml)
Double_t Sqrt(Double_t x)
static void output(int code)