78#ifdef MethodMLP_UseMinuit__
97 fUseRegulator(false), fCalculateErrors(false),
98 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
99 fTrainingMethod(kBFGS), fTrainMethodS(
"BFGS"),
100 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
101 fSamplingTraining(false), fSamplingTesting(false),
102 fLastAlpha(0.0), fTau(0.),
103 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
104 fBPMode(kSequential), fBpModeS(
"None"),
105 fBatchSize(0), fTestRate(0), fEpochMon(false),
106 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
107 fGA_SC_rate(0), fGA_SC_factor(0.0),
108 fDeviationsFromTargets(0),
120 fUseRegulator(false), fCalculateErrors(false),
121 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
122 fTrainingMethod(kBFGS), fTrainMethodS(
"BFGS"),
123 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
124 fSamplingTraining(false), fSamplingTesting(false),
125 fLastAlpha(0.0), fTau(0.),
126 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
127 fBPMode(kSequential), fBpModeS(
"None"),
128 fBatchSize(0), fTestRate(0), fEpochMon(false),
129 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
130 fGA_SC_rate(0), fGA_SC_factor(0.0),
131 fDeviationsFromTargets(0),
169 SetSignalReferenceCut( 0.5 );
170#ifdef MethodMLP_UseMinuit__
199 DeclareOptionRef(fTrainMethodS=
"BP",
"TrainingMethod",
200 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
205 DeclareOptionRef(fLearnRate=0.02,
"LearningRate",
"ANN learning rate parameter");
206 DeclareOptionRef(fDecayRate=0.01,
"DecayRate",
"Decay rate for learning parameter");
207 DeclareOptionRef(fTestRate =10,
"TestRate",
"Test for overtraining performed at each #th epochs");
208 DeclareOptionRef(fEpochMon =
kFALSE,
"EpochMonitoring",
"Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
210 DeclareOptionRef(fSamplingFraction=1.0,
"Sampling",
"Only 'Sampling' (randomly selected) events are trained each epoch");
211 DeclareOptionRef(fSamplingEpoch=1.0,
"SamplingEpoch",
"Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
212 DeclareOptionRef(fSamplingWeight=1.0,
"SamplingImportance",
" The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
214 DeclareOptionRef(fSamplingTraining=
kTRUE,
"SamplingTraining",
"The training sample is sampled");
215 DeclareOptionRef(fSamplingTesting=
kFALSE,
"SamplingTesting" ,
"The testing sample is sampled");
217 DeclareOptionRef(fResetStep=50,
"ResetStep",
"How often BFGS should reset history");
218 DeclareOptionRef(fTau =3.0,
"Tau",
"LineSearch \"size step\"");
220 DeclareOptionRef(fBpModeS=
"sequential",
"BPMode",
221 "Back-propagation learning mode: sequential or batch");
222 AddPreDefVal(
TString(
"sequential"));
223 AddPreDefVal(
TString(
"batch"));
225 DeclareOptionRef(fBatchSize=-1,
"BatchSize",
226 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
228 DeclareOptionRef(fImprovement=1
e-30,
"ConvergenceImprove",
229 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
231 DeclareOptionRef(fSteps=-1,
"ConvergenceTests",
232 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
234 DeclareOptionRef(fUseRegulator=
kFALSE,
"UseRegulator",
235 "Use regulator to avoid over-training");
236 DeclareOptionRef(fUpdateLimit=10000,
"UpdateLimit",
237 "Maximum times of regulator update");
238 DeclareOptionRef(fCalculateErrors=
kFALSE,
"CalculateErrors",
239 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value");
241 DeclareOptionRef(fWeightRange=1.0,
"WeightRange",
242 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
254 if (IgnoreEventsWithNegWeightsInTraining()) {
256 <<
"Will ignore negative events in training!"
261 if (fTrainMethodS ==
"BP" ) fTrainingMethod = kBP;
262 else if (fTrainMethodS ==
"BFGS") fTrainingMethod = kBFGS;
263 else if (fTrainMethodS ==
"GA" ) fTrainingMethod = kGA;
265 if (fBpModeS ==
"sequential") fBPMode = kSequential;
266 else if (fBpModeS ==
"batch") fBPMode = kBatch;
270 if (fBPMode == kBatch) {
272 Int_t numEvents = Data()->GetNEvents();
273 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
282 Log() << kDEBUG <<
"Initialize learning rates" <<
Endl;
284 Int_t numSynapses = fSynapses->GetEntriesFast();
285 for (
Int_t i = 0; i < numSynapses; i++) {
286 synapse = (
TSynapse*)fSynapses->At(i);
298 Log() << kFATAL <<
"<CalculateEstimator> fatal error: wrong tree type: " << treeType <<
Endl;
302 Data()->SetCurrentType(treeType);
313 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
314 histS =
new TH1F( nameS, nameS, nbin, -limit, limit );
315 histB =
new TH1F( nameB, nameB, nbin, -limit, limit );
321 Int_t nEvents = GetNEvents();
322 UInt_t nClasses = DataInfo().GetNClasses();
323 UInt_t nTgts = DataInfo().GetNTargets();
327 if( fWeightRange < 1.f ){
328 fDeviationsFromTargets =
new std::vector<std::pair<Float_t,Float_t> >(nEvents);
331 for (
Int_t i = 0; i < nEvents; i++) {
333 const Event* ev = GetEvent(i);
335 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
342 ForceNetworkInputs( ev );
343 ForceNetworkCalculations();
346 if (DoRegression()) {
347 for (
UInt_t itgt = 0; itgt < nTgts; itgt++) {
348 v = GetOutputNeuron( itgt )->GetActivationValue();
354 }
else if (DoMulticlass() ) {
356 if (fEstimator==kCE){
358 for (
UInt_t icls = 0; icls < nClasses; icls++) {
359 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
360 norm +=
exp( activationValue );
362 d =
exp( activationValue );
367 for (
UInt_t icls = 0; icls < nClasses; icls++) {
368 Double_t desired = (icls==cls) ? 1.0 : 0.0;
369 v = GetOutputNeuron( icls )->GetActivationValue();
370 d = (desired-
v)*(desired-
v);
375 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
376 v = GetOutputNeuron()->GetActivationValue();
377 if (fEstimator==kMSE)
d = (desired-
v)*(desired-
v);
382 if( fDeviationsFromTargets )
383 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(
d,w));
389 if (DataInfo().IsSignal(ev) && histS != 0) histS->
Fill(
float(
v), float(w) );
390 else if (histB != 0) histB->
Fill(
float(
v), float(w) );
394 if( fDeviationsFromTargets ) {
395 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
397 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
400 Float_t weightRangeCut = fWeightRange*sumOfWeights;
402 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
403 float deviation = (*itDev).first;
404 float devWeight = (*itDev).second;
405 weightSum += devWeight;
406 if( weightSum <= weightRangeCut ) {
407 estimator += devWeight*deviation;
411 sumOfWeights = sumOfWeightsInRange;
412 delete fDeviationsFromTargets;
415 if (histS != 0) fEpochMonHistS.push_back( histS );
416 if (histB != 0) fEpochMonHistB.push_back( histB );
421 if (DoRegression()) estimator = estimator/
Float_t(sumOfWeights);
422 else if (DoMulticlass()) estimator = estimator/
Float_t(sumOfWeights);
423 else estimator = estimator/
Float_t(sumOfWeights);
428 Data()->SetCurrentType( saveType );
431 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType ==
Types::kTraining) {
432 CreateWeightMonitoringHists(
Form(
"epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
444 Log() << kFATAL <<
"ANN Network is not initialized, doing it now!"<<
Endl;
445 SetAnalysisType(GetAnalysisType());
447 Log() << kDEBUG <<
"reinitialize learning rates" <<
Endl;
448 InitializeLearningRates();
450 PrintMessage(
"Training Network");
452 Int_t nEvents=GetNEvents();
453 Int_t nSynapses=fSynapses->GetEntriesFast();
454 if (nSynapses>nEvents)
455 Log()<<kWARNING<<
"ANN too complicated: #events="<<nEvents<<
"\t#synapses="<<nSynapses<<
Endl;
457 fIPyMaxIter = nEpochs;
458 if (fInteractive && fInteractive->NotInitialized()){
459 std::vector<TString> titles = {
"Error on training set",
"Error on test set"};
460 fInteractive->Init(titles);
463#ifdef MethodMLP_UseMinuit__
464 if (useMinuit) MinuitMinimize();
466 if (fTrainingMethod == kGA) GeneticMinimize();
467 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
468 else BackPropagationMinimize(nEpochs);
474 Log()<<kINFO<<
"Finalizing handling of Regulator terms, trainE="<<trainE<<
" testE="<<testE<<
Endl;
476 Log()<<kINFO<<
"Done with handling of Regulator terms"<<
Endl;
479 if( fCalculateErrors || fUseRegulator )
481 Int_t numSynapses=fSynapses->GetEntriesFast();
482 fInvHessian.ResizeTo(numSynapses,numSynapses);
483 GetApproxInvHessian( fInvHessian ,
false);
499 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
500 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
501 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
502 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
505 Int_t nSynapses = fSynapses->GetEntriesFast();
506 Int_t nWeights = nSynapses;
508 for (
Int_t i=0;i<nSynapses;i++) {
513 std::vector<Double_t> buffer( nWeights );
514 for (
Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
517 TMatrixD Hessian ( nWeights, nWeights );
521 Int_t RegUpdateTimes=0;
529 if(fSamplingTraining || fSamplingTesting)
530 Data()->InitSampling(1.0,1.0,fRandomSeed);
532 if (fSteps > 0)
Log() << kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
536 for (
Int_t i = 0; i < nEpochs; i++) {
538 if (fExitFromTraining)
break;
540 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
541 if ((i+1)%fTestRate == 0 || (i == 0)) {
542 if (fSamplingTraining) {
544 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
545 Data()->CreateSampling();
547 if (fSamplingTesting) {
549 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
550 Data()->CreateSampling();
556 Data()->InitSampling(1.0,1.0);
558 Data()->InitSampling(1.0,1.0);
569 SetGammaDelta(
Gamma, Delta, buffer );
571 if (i % fResetStep == 0 && i<0.5*nEpochs) {
577 if (GetHessian( Hessian,
Gamma, Delta )) {
582 else SetDir( Hessian, Dir );
586 if (DerivDir( Dir ) > 0) {
591 if (LineSearch( Dir, buffer, &dError )) {
595 if (LineSearch(Dir, buffer, &dError)) {
597 Log() << kFATAL <<
"Line search failed! Huge troubles somewhere..." <<
Endl;
602 if (dError<0)
Log()<<kWARNING<<
"\nnegative dError=" <<dError<<
Endl;
605 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 &&
fabs(dError)<0.1*AccuError) {
606 Log()<<kDEBUG<<
"\n\nUpdate regulators "<<RegUpdateTimes<<
" on epoch "<<i<<
"\tdError="<<dError<<
Endl;
616 if ((i+1)%fTestRate == 0) {
621 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
624 fEstimatorHistTrain->Fill( i+1, trainE );
625 fEstimatorHistTest ->Fill( i+1, testE );
628 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1
e-100)) {
631 Data()->EventResult( success );
633 SetCurrentValue( testE );
634 if (HasConverged()) {
635 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
636 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
638 ResetConvergenceCounter();
645 TString convText =
Form(
"<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i );
648 if (
Float_t(i)/nEpochs < fSamplingEpoch)
650 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
654 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
656 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit;
657 if (progress2>progress) progress=progress2;
662 if (progress<i) progress=i;
678 Int_t nWeights = fSynapses->GetEntriesFast();
681 Int_t nSynapses = fSynapses->GetEntriesFast();
682 for (
Int_t i=0;i<nSynapses;i++) {
687 for (
Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
692 for (
Int_t i=0;i<nSynapses;i++)
703 Int_t nSynapses = fSynapses->GetEntriesFast();
704 for (
Int_t i=0;i<nSynapses;i++) {
709 Int_t nEvents = GetNEvents();
710 Int_t nPosEvents = nEvents;
711 for (
Int_t i=0;i<nEvents;i++) {
713 const Event* ev = GetEvent(i);
714 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
722 for (
Int_t j=0;j<nSynapses;j++) {
728 for (
Int_t i=0;i<nSynapses;i++) {
731 if (fUseRegulator) DEDw+=fPriorDev[i];
732 synapse->
SetDEDw( DEDw / nPosEvents );
742 ForceNetworkInputs( ev );
743 ForceNetworkCalculations();
745 if (DoRegression()) {
746 UInt_t ntgt = DataInfo().GetNTargets();
747 for (
UInt_t itgt = 0; itgt < ntgt; itgt++) {
749 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
750 GetOutputNeuron( itgt )->SetError(error);
752 }
else if (DoMulticlass()) {
753 UInt_t nClasses = DataInfo().GetNClasses();
755 for (
UInt_t icls = 0; icls < nClasses; icls++) {
756 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
757 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
758 GetOutputNeuron( icls )->SetError(error);
761 Double_t desired = GetDesiredOutput( ev );
763 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight;
764 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired);
765 GetOutputNeuron()->SetError(error);
768 CalculateNeuronDeltas();
769 for (
Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
781 Int_t nSynapses = fSynapses->GetEntriesFast();
783 for (
Int_t i=0;i<nSynapses;i++) {
785 Dir[IDX++][0] = -synapse->
GetDEDw();
815 Int_t nSynapses = fSynapses->GetEntriesFast();
818 for (
Int_t i=0;i<nSynapses;i++) {
820 DEDw[IDX++][0] = synapse->
GetDEDw();
823 dir = Hessian * DEDw;
824 for (
Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
832 Int_t nSynapses = fSynapses->GetEntriesFast();
835 for (
Int_t i=0;i<nSynapses;i++) {
837 Result += Dir[IDX++][0] * synapse->
GetDEDw();
847 Int_t nSynapses = fSynapses->GetEntriesFast();
848 Int_t nWeights = nSynapses;
850 std::vector<Double_t> Origin(nWeights);
851 for (
Int_t i=0;i<nSynapses;i++) {
862 if (alpha2 < 0.01) alpha2 = 0.01;
863 else if (alpha2 > 2.0) alpha2 = 2.0;
867 SetDirWeights( Origin, Dir, alpha2 );
875 for (
Int_t i=0;i<100;i++) {
877 SetDirWeights(Origin, Dir, alpha3);
889 SetDirWeights(Origin, Dir, 0.);
894 for (
Int_t i=0;i<100;i++) {
897 Log() << kWARNING <<
"linesearch, starting to investigate direction opposite of steepestDIR" <<
Endl;
898 alpha2 = -alpha_original;
900 SetDirWeights(Origin, Dir, alpha2);
910 SetDirWeights(Origin, Dir, 0.);
911 Log() << kWARNING <<
"linesearch, failed even in opposite direction of steepestDIR" <<
Endl;
917 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
918 fLastAlpha = 0.5 * (alpha1 + alpha3 -
919 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
920 - ( err2 - err1 ) / (alpha2 - alpha1 )));
926 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
928 SetDirWeights(Origin, Dir, fLastAlpha);
935 if (finalError > err1) {
936 Log() << kWARNING <<
"Line search increased error! Something is wrong."
937 <<
"fLastAlpha=" << fLastAlpha <<
"al123=" << alpha1 <<
" "
938 << alpha2 <<
" " << alpha3 <<
" err1="<< err1 <<
" errfinal=" << finalError <<
Endl;
941 for (
Int_t i=0;i<nSynapses;i++) {
943 buffer[IDX] = synapse->
GetWeight() - Origin[IDX];
947 if (dError) (*dError)=(errOrigin-finalError)/finalError;
957 Int_t nSynapses = fSynapses->GetEntriesFast();
959 for (
Int_t i=0;i<nSynapses;i++) {
961 synapse->
SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
964 if (fUseRegulator) UpdatePriors();
972 Int_t nEvents = GetNEvents();
973 UInt_t ntgts = GetNTargets();
976 for (
Int_t i=0;i<nEvents;i++) {
977 const Event* ev = GetEvent(i);
979 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
986 if (DoRegression()) {
987 for (
UInt_t itgt = 0; itgt < ntgts; itgt++) {
988 error += GetMSEErr( ev, itgt );
990 }
else if ( DoMulticlass() ){
991 for(
UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
992 error += GetMSEErr( ev, icls );
995 if (fEstimator==kMSE) error = GetMSEErr( ev );
996 else if (fEstimator==kCE) error= GetCEErr( ev );
1000 if (fUseRegulator) Result+=fPrior;
1001 if (Result<0)
Log()<<kWARNING<<
"\nNegative Error!!! :"<<Result-fPrior<<
"+"<<fPrior<<
Endl;
1010 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1012 if (DoRegression()) target = ev->
GetTarget( index );
1013 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
1014 else target = GetDesiredOutput( ev );
1027 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1029 if (DoRegression()) target = ev->
GetTarget( index );
1030 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
1031 else target = GetDesiredOutput( ev );
1051 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
1052 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1053 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
1054 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1056 if(fSamplingTraining || fSamplingTesting)
1057 Data()->InitSampling(1.0,1.0,fRandomSeed);
1059 if (fSteps > 0)
Log() << kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
1067 for (
Int_t i = 0; i < nEpochs; i++) {
1069 if (fExitFromTraining)
break;
1070 fIPyCurrentIter = i;
1071 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1072 if ((i+1)%fTestRate == 0 || (i == 0)) {
1073 if (fSamplingTraining) {
1075 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1076 Data()->CreateSampling();
1078 if (fSamplingTesting) {
1080 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1081 Data()->CreateSampling();
1087 Data()->InitSampling(1.0,1.0);
1089 Data()->InitSampling(1.0,1.0);
1094 DecaySynapseWeights(i >= lateEpoch);
1097 if ((i+1)%fTestRate == 0) {
1100 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1103 fEstimatorHistTrain->Fill( i+1, trainE );
1104 fEstimatorHistTest ->Fill( i+1, testE );
1107 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1
e-100)) {
1110 Data()->EventResult( success );
1112 SetCurrentValue( testE );
1113 if (HasConverged()) {
1114 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1115 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
1117 ResetConvergenceCounter();
1120 if (lateEpoch > i) lateEpoch = i;
1127 TString convText =
Form(
"<D^2> (train/test): %.4g/%.4g", trainE, testE );
1130 if (
Float_t(i)/nEpochs < fSamplingEpoch)
1131 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1133 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1148 Int_t nEvents = Data()->GetNEvents();
1152 for (
Int_t i = 0; i < nEvents; i++) index[i] = i;
1153 Shuffle(index, nEvents);
1156 for (
Int_t i = 0; i < nEvents; i++) {
1158 const Event * ev = GetEvent(index[i]);
1159 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1164 TrainOneEvent(index[i]);
1167 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1168 AdjustSynapseWeights();
1169 if (fgPRINT_BATCH) {
1198 for (
Int_t i = 0; i <
n; i++) {
1199 j = (
Int_t) (frgen->Rndm() *
a);
1202 index[j] = index[i];
1215 Int_t numSynapses = fSynapses->GetEntriesFast();
1216 for (
Int_t i = 0; i < numSynapses; i++) {
1217 synapse = (
TSynapse*)fSynapses->At(i);
1240 if (
type == 0) desired = fOutput->GetMin();
1241 else desired = fOutput->GetMax();
1247 for (
UInt_t j = 0; j < GetNvar(); j++) {
1250 neuron = GetInputNeuron(j);
1254 ForceNetworkCalculations();
1255 UpdateNetwork(desired, eventWeight);
1269 const Event * ev = GetEvent(ievt);
1271 ForceNetworkInputs( ev );
1272 ForceNetworkCalculations();
1273 if (DoRegression()) UpdateNetwork( ev->
GetTargets(), eventWeight );
1274 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1275 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1283 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin();
1292 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1293 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ;
1294 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired);
1295 else Log() << kFATAL <<
"Estimator type unspecified!!" <<
Endl;
1296 error *= eventWeight;
1297 GetOutputNeuron()->SetError(error);
1298 CalculateNeuronDeltas();
1310 for (
UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1311 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1316 for (
UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1317 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1320 error *= eventWeight;
1321 GetOutputNeuron(i)->SetError(error);
1325 CalculateNeuronDeltas();
1336 Int_t numLayers = fNetwork->GetEntriesFast();
1341 for (
Int_t i = numLayers-1; i >= 0; i--) {
1345 for (
Int_t j = 0; j < numNeurons; j++) {
1362 PrintMessage(
"Minimizing Estimator with GA");
1368 fGA_SC_factor = 0.95;
1372 std::vector<Interval*> ranges;
1374 Int_t numWeights = fSynapses->GetEntriesFast();
1375 for (
Int_t ivar=0; ivar< numWeights; ivar++) {
1376 ranges.push_back(
new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1382 Double_t estimator = CalculateEstimator();
1383 Log() << kINFO <<
"GA: estimator after optimization: " << estimator <<
Endl;
1391 return ComputeEstimator( parameters );
1400 Int_t numSynapses = fSynapses->GetEntriesFast();
1402 for (
Int_t i = 0; i < numSynapses; i++) {
1403 synapse = (
TSynapse*)fSynapses->At(i);
1406 if (fUseRegulator) UpdatePriors();
1408 Double_t estimator = CalculateEstimator();
1423 for (
Int_t i = 0; i < numLayers; i++) {
1427 for (
Int_t j = 0; j < numNeurons; j++) {
1445 for (
Int_t i = numLayers-1; i >= 0; i--) {
1449 for (
Int_t j = 0; j < numNeurons; j++) {
1462 Int_t nSynapses = fSynapses->GetEntriesFast();
1463 for (
Int_t i=0;i<nSynapses;i++) {
1465 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight())*(synapse->
GetWeight());
1466 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight()));
1475 GetApproxInvHessian(InvH);
1476 Int_t numSynapses=fSynapses->GetEntriesFast();
1477 Int_t numRegulators=fRegulators.size();
1480 std::vector<Int_t> nWDP(numRegulators);
1481 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1482 for (
int i=0;i<numSynapses;i++) {
1484 Int_t idx=fRegulatorIdx[i];
1486 trace[idx]+=InvH[i][i];
1487 gamma+=1-fRegulators[idx]*InvH[i][i];
1490 if (fEstimator==kMSE) {
1496 for (
int i=0;i<numRegulators;i++)
1499 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1500 if (fRegulators[i]<0) fRegulators[i]=0;
1501 Log()<<kDEBUG<<
"R"<<i<<
":"<<fRegulators[i]<<
"\t";
1506 Log()<<kDEBUG<<
"\n"<<
"trainE:"<<trainE<<
"\ttestE:"<<testE<<
"\tvariance:"<<variance<<
"\tgamma:"<<
gamma<<
Endl;
1514 Int_t numSynapses=fSynapses->GetEntriesFast();
1515 InvHessian.
ResizeTo( numSynapses, numSynapses );
1519 Int_t nEvents = GetNEvents();
1520 for (
Int_t i=0;i<nEvents;i++) {
1522 double outputValue=GetMvaValue();
1523 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1524 CalculateNeuronDeltas();
1525 for (
Int_t j = 0; j < numSynapses; j++){
1529 sens[j][0]=sensT[0][j]=synapses->
GetDelta();
1531 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1532 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1537 for (
Int_t i = 0; i < numSynapses; i++){
1538 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1542 for (
Int_t i = 0; i < numSynapses; i++){
1543 InvHessian[i][i]+=1
e-6;
1558 if (!fCalculateErrors || errLower==0 || errUpper==0)
1561 Double_t MvaUpper,MvaLower,median,variance;
1562 Int_t numSynapses=fSynapses->GetEntriesFast();
1563 if (fInvHessian.GetNcols()!=numSynapses) {
1564 Log() << kWARNING <<
"inconsistent dimension " << fInvHessian.GetNcols() <<
" vs " << numSynapses <<
Endl;
1568 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1570 CalculateNeuronDeltas();
1571 for (
Int_t i = 0; i < numSynapses; i++){
1578 TMatrixD sig=sensT*fInvHessian*sens;
1580 median=GetOutputNeuron()->GetValue();
1583 Log()<<kWARNING<<
"Negative variance!!! median=" << median <<
"\tvariance(sigma^2)=" << variance <<
Endl;
1586 variance=
sqrt(variance);
1589 MvaUpper=fOutput->Eval(median+variance);
1591 *errUpper=MvaUpper-MvaValue;
1594 MvaLower=fOutput->Eval(median-variance);
1596 *errLower=MvaValue-MvaLower;
1602#ifdef MethodMLP_UseMinuit__
1607void TMVA::MethodMLP::MinuitMinimize()
1609 fNumberOfWeights = fSynapses->GetEntriesFast();
1624 for (
Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1627 parName, w[ipar], 0.1, 0, 0 );
1631 tfitter->
SetFCN( &IFCN );
1670 ((MethodMLP*)GetThisPtr())->FCN( npars, grad,
f, fitPars, iflag );
1673TTHREAD_TLS(
Int_t) nc = 0;
1674TTHREAD_TLS(
double) minf = 1000000;
1679 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1685 f = CalculateEstimator();
1688 if (
f < minf) minf =
f;
1689 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++)
Log() << kDEBUG << fitPars[ipar] <<
" ";
1691 Log() << kDEBUG <<
"***** New estimator: " <<
f <<
" min: " << minf <<
" --> ncalls: " << nc <<
Endl;
1725 Log() << col <<
"--- Short description:" << colres <<
Endl;
1727 Log() <<
"The MLP artificial neural network (ANN) is a traditional feed-" <<
Endl;
1728 Log() <<
"forward multilayer perceptron implementation. The MLP has a user-" <<
Endl;
1729 Log() <<
"defined hidden layer architecture, while the number of input (output)" <<
Endl;
1730 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1731 Log() <<
"signal and one background). " <<
Endl;
1733 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
1735 Log() <<
"Neural networks are stable and performing for a large variety of " <<
Endl;
1736 Log() <<
"linear and non-linear classification problems. However, in contrast" <<
Endl;
1737 Log() <<
"to (e.g.) boosted decision trees, the user is advised to reduce the " <<
Endl;
1738 Log() <<
"number of input variables that have only little discrimination power. " <<
Endl;
1740 Log() <<
"In the tests we have carried out so far, the MLP and ROOT networks" <<
Endl;
1741 Log() <<
"(TMlpANN, interfaced via TMVA) performed equally well, with however" <<
Endl;
1742 Log() <<
"a clear speed advantage for the MLP. The Clermont-Ferrand neural " <<
Endl;
1743 Log() <<
"net (CFMlpANN) exhibited worse classification performance in these" <<
Endl;
1744 Log() <<
"tests, which is partly due to the slow convergence of its training" <<
Endl;
1745 Log() <<
"(at least 10k training cycles are required to achieve approximately" <<
Endl;
1746 Log() <<
"competitive results)." <<
Endl;
1748 Log() << col <<
"Overtraining: " << colres
1749 <<
"only the TMlpANN performs an explicit separation of the" <<
Endl;
1750 Log() <<
"full training sample into independent training and validation samples." <<
Endl;
1751 Log() <<
"We have found that in most high-energy physics applications the " <<
Endl;
1752 Log() <<
"available degrees of freedom (training events) are sufficient to " <<
Endl;
1753 Log() <<
"constrain the weights of the relatively simple architectures required" <<
Endl;
1754 Log() <<
"to achieve good performance. Hence no overtraining should occur, and " <<
Endl;
1755 Log() <<
"the use of validation samples would only reduce the available training" <<
Endl;
1756 Log() <<
"information. However, if the performance on the training sample is " <<
Endl;
1757 Log() <<
"found to be significantly better than the one found with the inde-" <<
Endl;
1758 Log() <<
"pendent test sample, caution is needed. The results for these samples " <<
Endl;
1759 Log() <<
"are printed to standard output at the end of each training job." <<
Endl;
1761 Log() << col <<
"--- Performance tuning via configuration options:" << colres <<
Endl;
1763 Log() <<
"The hidden layer architecture for all ANNs is defined by the option" <<
Endl;
1764 Log() <<
"\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" <<
Endl;
1765 Log() <<
"neurons and the second N neurons (and so on), and where N is the number " <<
Endl;
1766 Log() <<
"of input variables. Excessive numbers of hidden layers should be avoided," <<
Endl;
1767 Log() <<
"in favour of more neurons in the first hidden layer." <<
Endl;
1769 Log() <<
"The number of cycles should be above 500. As said, if the number of" <<
Endl;
1770 Log() <<
"adjustable weights is small compared to the training sample size," <<
Endl;
1771 Log() <<
"using a large number of training samples should not lead to overtraining." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
virtual void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t))
Specify the address of the fitting algorithm.
1-D histogram with a float per channel (see TH1 documentation)}
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Bool_t WriteOptionsReference() const
Class that contains all the data information.
std::vector< Float_t > & GetTargets()
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetTarget(UInt_t itgt) const
Base class for TMVA fitters.
Double_t Run()
estimator function interface for fitting
Fitter using a Genetic Algorithm.
The TMVA::Interval Class.
Base class for all TMVA methods using artificial neural networks.
virtual void ProcessOptions()
do nothing specific at this moment
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Multilayer Perceptron class built off of MethodANNBase.
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
void GetHelpMessage() const
get help message text
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
void SteepestDir(TMatrixD &Dir)
void TrainOneEpoch()
train network over a single epoch/cycle of events
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Double_t ComputeEstimator(std::vector< Double_t > ¶meters)
this function is called by GeneticANN for GA optimization
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Double_t DerivDir(TMatrixD &Dir)
Double_t GetCEErr(const Event *ev, UInt_t index=0)
virtual ~MethodMLP()
destructor nothing to be done
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
void Shuffle(Int_t *index, Int_t n)
Input:
void SimulateEvent(const Event *ev)
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Double_t EstimatorFunction(std::vector< Double_t > ¶meters)
interface to the estimate
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
void Init()
default initializations
void ProcessOptions()
process user options
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
void DeclareOptions()
define the options (their key words) that can be set in the option string
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Neuron class used by TMVA artificial neural network methods.
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
void CalculateDelta()
calculate error field
Synapse class used by TMVA artificial neural network methods.
void SetWeight(Double_t weight)
set synapse weight
void SetDEDw(Double_t DEDw)
void SetLearningRate(Double_t rate)
void DecayLearningRate(Double_t rate)
void CalculateDelta()
calculate/adjust the error field for this synapse
Timing information for training and evaluation of MVA methods.
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Singleton class for Global types used by TMVA.
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Int_t GetEntriesFast() const
TObject * At(Int_t idx) const
void SetWeight(Double_t w)
Sets the weight of the synapse.
std::string GetName(const std::string &scope_name)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
MsgLogger & Endl(MsgLogger &ml)
Double_t Sqrt(Double_t x)
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
static void output(int code)