137#pragma warning ( disable : 4355 )
158 fMultiGraph =
nullptr;
172 std::cerr << kERROR <<
"IPythonInteractive::Init: already initialized..." << std::endl;
176 for(
auto& title : graphTitles){
177 fGraphs.push_back(
new TGraph() );
178 fGraphs.back()->SetTitle(title);
179 fGraphs.back()->SetName(title);
180 fGraphs.back()->SetFillColor(color);
181 fGraphs.back()->SetLineColor(color);
182 fGraphs.back()->SetMarkerColor(color);
183 fMultiGraph->Add(fGraphs.back());
195 for(
Int_t i=0; i<fNumGraphs; i++){
209 fGraphs[0]->Set(fIndex+1);
210 fGraphs[1]->Set(fIndex+1);
211 fGraphs[0]->SetPoint(fIndex,
x, y1);
212 fGraphs[1]->SetPoint(fIndex,
x, y2);
225 for(
Int_t i=0; i<fNumGraphs;i++){
226 fGraphs[i]->Set(fIndex+1);
227 fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
247 fAnalysisType (
Types::kNoAnalysisType ),
248 fRegressionReturnVal ( 0 ),
249 fMulticlassReturnVal ( 0 ),
250 fDataSetInfo ( dsi ),
251 fSignalReferenceCut ( 0.5 ),
252 fSignalReferenceCutOrientation( 1. ),
253 fVariableTransformType (
Types::kSignal ),
254 fJobName ( jobName ),
255 fMethodName ( methodTitle ),
256 fMethodType ( methodType ),
260 fConstructedFromWeightFile (
kFALSE ),
262 fMethodBaseDir ( 0 ),
265 fModelPersistence (
kTRUE),
276 fSplTrainEffBvsS ( 0 ),
277 fVarTransformString (
"None" ),
278 fTransformationPointer ( 0 ),
279 fTransformation ( dsi, methodTitle ),
281 fVerbosityLevelString (
"Default" ),
284 fIgnoreNegWeightsInTraining(
kFALSE ),
286 fBackgroundClass ( 0 ),
311 fAnalysisType (
Types::kNoAnalysisType ),
312 fRegressionReturnVal ( 0 ),
313 fMulticlassReturnVal ( 0 ),
314 fDataSetInfo ( dsi ),
315 fSignalReferenceCut ( 0.5 ),
316 fVariableTransformType (
Types::kSignal ),
318 fMethodName (
"MethodBase" ),
319 fMethodType ( methodType ),
321 fTMVATrainingVersion ( 0 ),
322 fROOTTrainingVersion ( 0 ),
323 fConstructedFromWeightFile (
kTRUE ),
325 fMethodBaseDir ( 0 ),
328 fModelPersistence (
kTRUE),
329 fWeightFile ( weightFile ),
339 fSplTrainEffBvsS ( 0 ),
340 fVarTransformString (
"None" ),
341 fTransformationPointer ( 0 ),
342 fTransformation ( dsi,
"" ),
344 fVerbosityLevelString (
"Default" ),
347 fIgnoreNegWeightsInTraining(
kFALSE ),
349 fBackgroundClass ( 0 ),
367 if (!fSetupCompleted) Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Calling destructor of method which got never setup" <<
Endl;
370 if (fInputVars != 0) { fInputVars->clear();
delete fInputVars; }
371 if (fRanking != 0)
delete fRanking;
374 if (fDefaultPDF!= 0) {
delete fDefaultPDF; fDefaultPDF = 0; }
375 if (fMVAPdfS != 0) {
delete fMVAPdfS; fMVAPdfS = 0; }
376 if (fMVAPdfB != 0) {
delete fMVAPdfB; fMVAPdfB = 0; }
379 if (fSplS) {
delete fSplS; fSplS = 0; }
380 if (fSplB) {
delete fSplB; fSplB = 0; }
381 if (fSpleffBvsS) {
delete fSpleffBvsS; fSpleffBvsS = 0; }
382 if (fSplRefS) {
delete fSplRefS; fSplRefS = 0; }
383 if (fSplRefB) {
delete fSplRefB; fSplRefB = 0; }
384 if (fSplTrainRefS) {
delete fSplTrainRefS; fSplTrainRefS = 0; }
385 if (fSplTrainRefB) {
delete fSplTrainRefB; fSplTrainRefB = 0; }
386 if (fSplTrainEffBvsS) {
delete fSplTrainEffBvsS; fSplTrainEffBvsS = 0; }
388 for (
Int_t i = 0; i < 2; i++ ) {
389 if (fEventCollections.at(i)) {
390 for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
391 it != fEventCollections.at(i)->end(); ++it) {
394 delete fEventCollections.at(i);
395 fEventCollections.at(i) = 0;
399 if (fRegressionReturnVal)
delete fRegressionReturnVal;
400 if (fMulticlassReturnVal)
delete fMulticlassReturnVal;
410 if (fSetupCompleted) Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Calling SetupMethod for the second time" <<
Endl;
412 DeclareBaseOptions();
415 fSetupCompleted =
kTRUE;
425 ProcessBaseOptions();
435 CheckForUnusedOptions();
443 SetConfigDescription(
"Configuration options for classifier architecture and tuning" );
451 fSplTrainEffBvsS = 0;
458 fTxtWeightsOnly =
kTRUE;
468 fInputVars =
new std::vector<TString>;
469 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
470 fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
472 fRegressionReturnVal = 0;
473 fMulticlassReturnVal = 0;
475 fEventCollections.resize( 2 );
476 fEventCollections.at(0) = 0;
477 fEventCollections.at(1) = 0;
480 if (DataInfo().GetClassInfo(
"Signal") != 0) {
481 fSignalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
483 if (DataInfo().GetClassInfo(
"Background") != 0) {
484 fBackgroundClass = DataInfo().GetClassInfo(
"Background")->GetNumber();
487 SetConfigDescription(
"Configuration options for MVA method" );
488 SetConfigName(
TString(
"Method") + GetMethodTypeName() );
511 DeclareOptionRef( fVerbose,
"V",
"Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
513 DeclareOptionRef( fVerbosityLevelString=
"Default",
"VerbosityLevel",
"Verbosity level" );
514 AddPreDefVal(
TString(
"Default") );
515 AddPreDefVal(
TString(
"Debug") );
516 AddPreDefVal(
TString(
"Verbose") );
517 AddPreDefVal(
TString(
"Info") );
518 AddPreDefVal(
TString(
"Warning") );
519 AddPreDefVal(
TString(
"Error") );
520 AddPreDefVal(
TString(
"Fatal") );
524 fTxtWeightsOnly =
kTRUE;
527 DeclareOptionRef( fVarTransformString,
"VarTransform",
"List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
529 DeclareOptionRef( fHelp,
"H",
"Print method-specific help message" );
531 DeclareOptionRef( fHasMVAPdfs,
"CreateMVAPdfs",
"Create PDFs for classifier outputs (signal and background)" );
533 DeclareOptionRef( fIgnoreNegWeightsInTraining,
"IgnoreNegWeightsInTraining",
534 "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
546 fDefaultPDF =
new PDF(
TString(GetName())+
"_PDF", GetOptions(),
"MVAPdf" );
547 fDefaultPDF->DeclareOptions();
548 fDefaultPDF->ParseOptions();
549 fDefaultPDF->ProcessOptions();
550 fMVAPdfB =
new PDF(
TString(GetName())+
"_PDFBkg", fDefaultPDF->GetOptions(),
"MVAPdfBkg", fDefaultPDF );
551 fMVAPdfB->DeclareOptions();
552 fMVAPdfB->ParseOptions();
553 fMVAPdfB->ProcessOptions();
554 fMVAPdfS =
new PDF(
TString(GetName())+
"_PDFSig", fMVAPdfB->GetOptions(),
"MVAPdfSig", fDefaultPDF );
555 fMVAPdfS->DeclareOptions();
556 fMVAPdfS->ParseOptions();
557 fMVAPdfS->ProcessOptions();
560 SetOptions( fMVAPdfS->GetOptions() );
565 GetTransformationHandler(),
569 if (fDefaultPDF!= 0) {
delete fDefaultPDF; fDefaultPDF = 0; }
570 if (fMVAPdfS != 0) {
delete fMVAPdfS; fMVAPdfS = 0; }
571 if (fMVAPdfB != 0) {
delete fMVAPdfB; fMVAPdfB = 0; }
575 fVerbosityLevelString =
TString(
"Verbose");
576 Log().SetMinType( kVERBOSE );
578 else if (fVerbosityLevelString ==
"Debug" ) Log().SetMinType( kDEBUG );
579 else if (fVerbosityLevelString ==
"Verbose" ) Log().SetMinType( kVERBOSE );
580 else if (fVerbosityLevelString ==
"Info" ) Log().SetMinType( kINFO );
581 else if (fVerbosityLevelString ==
"Warning" ) Log().SetMinType( kWARNING );
582 else if (fVerbosityLevelString ==
"Error" ) Log().SetMinType( kERROR );
583 else if (fVerbosityLevelString ==
"Fatal" ) Log().SetMinType( kFATAL );
584 else if (fVerbosityLevelString !=
"Default" ) {
585 Log() << kFATAL <<
"<ProcessOptions> Verbosity level type '"
586 << fVerbosityLevelString <<
"' unknown." <<
Endl;
598 DeclareOptionRef( fNormalise=
kFALSE,
"Normalise",
"Normalise input variables" );
599 DeclareOptionRef( fUseDecorr=
kFALSE,
"D",
"Use-decorrelated-variables flag" );
600 DeclareOptionRef( fVariableTransformTypeString=
"Signal",
"VarTransformType",
601 "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
602 AddPreDefVal(
TString(
"Signal") );
603 AddPreDefVal(
TString(
"Background") );
604 DeclareOptionRef( fTxtWeightsOnly=
kTRUE,
"TxtWeightFilesOnly",
"If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
614 DeclareOptionRef( fNbinsMVAPdf = 60,
"NbinsMVAPdf",
"Number of bins used for the PDFs of classifier outputs" );
615 DeclareOptionRef( fNsmoothMVAPdf = 2,
"NsmoothMVAPdf",
"Number of smoothing iterations for classifier PDFs" );
629 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Parameter optimization is not yet implemented for method "
630 << GetName() <<
Endl;
631 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Currently we need to set hardcoded which parameter is tuned in which ranges"<<
Endl;
633 std::map<TString,Double_t> tunedParameters;
634 tunedParameters.size();
635 return tunedParameters;
656 if (Help()) PrintHelpMessage();
659 if(!IsSilentFile()) BaseDir()->cd();
663 GetTransformationHandler().CalcTransformations(Data()->GetEventCollection());
667 <<
"Begin training" <<
Endl;
668 Long64_t nEvents = Data()->GetNEvents();
669 Timer traintimer( nEvents, GetName(),
kTRUE );
672 <<
"\tEnd of training " <<
Endl;
675 <<
"Elapsed time for training with " << nEvents <<
" events: "
679 <<
"\tCreate MVA output for ";
682 if (DoMulticlass()) {
683 Log() <<
Form(
"[%s] : ",DataInfo().GetName())<<
"Multiclass classification on training sample" <<
Endl;
686 else if (!DoRegression()) {
688 Log() <<
Form(
"[%s] : ",DataInfo().GetName())<<
"classification on training sample" <<
Endl;
697 Log() <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"regression on training sample" <<
Endl;
701 Log() <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Create PDFs" <<
Endl;
708 if (fModelPersistence ) WriteStateToFile();
711 if ((!DoRegression()) && (fModelPersistence)) MakeClass();
718 WriteMonitoringHistosToFile();
726 if (!DoRegression()) Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Trying to use GetRegressionDeviation() with a classification job" <<
Endl;
727 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Create results for " << (
type==
Types::kTraining?
"training":
"testing") <<
Endl;
729 bool truncate =
false;
730 TH1F*
h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
735 TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
736 stddev90Percent = sqrt(h2->
GetMean());
746 Data()->SetCurrentType(
type);
748 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Create results for " << (
type==
Types::kTraining?
"training":
"testing") <<
Endl;
752 Long64_t nEvents = Data()->GetNEvents();
756 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"Evaluation of " << GetMethodName() <<
" on "
759 regRes->Resize( nEvents );
764 Int_t totalProgressDraws = 100;
765 Int_t drawProgressEvery = 1;
766 if(nEvents >= totalProgressDraws) drawProgressEvery = nEvents/totalProgressDraws;
768 for (
Int_t ievt=0; ievt<nEvents; ievt++) {
770 Data()->SetCurrentEvent(ievt);
771 std::vector< Float_t > vals = GetRegressionValues();
772 regRes->SetValue( vals, ievt );
775 if(ievt % drawProgressEvery == 0 || ievt==nEvents-1) timer.
DrawProgressBar( ievt );
778 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())
779 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
786 TString histNamePrefix(GetTestvarName());
788 regRes->CreateDeviationHistograms( histNamePrefix );
796 Data()->SetCurrentType(
type);
798 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Create results for " << (
type==
Types::kTraining?
"training":
"testing") <<
Endl;
801 if (!resMulticlass) Log() << kFATAL<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"unable to create pointer in AddMulticlassOutput, exiting."<<
Endl;
803 Long64_t nEvents = Data()->GetNEvents();
808 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Multiclass evaluation of " << GetMethodName() <<
" on "
811 resMulticlass->Resize( nEvents );
812 for (
Int_t ievt=0; ievt<nEvents; ievt++) {
813 Data()->SetCurrentEvent(ievt);
814 std::vector< Float_t > vals = GetMulticlassValues();
815 resMulticlass->SetValue( vals, ievt );
819 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())
820 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
827 TString histNamePrefix(GetTestvarName());
830 resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
831 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
838 if (errUpper) *errUpper=-1;
845 Double_t val = GetMvaValue(err, errUpper);
855 return GetMvaValue()*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ?
kTRUE :
kFALSE;
862 return mvaVal*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ?
kTRUE :
kFALSE;
870 Data()->SetCurrentType(
type);
875 Long64_t nEvents = Data()->GetNEvents();
880 std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents,
true);
887 for (
Int_t ievt = 0; ievt < nEvents; ievt++) {
890 auto ev = Data()->GetEvent(ievt);
891 clRes->
SetValue(mvaValues[ievt], ievt, DataInfo().IsSignal(ev));
900 Long64_t nEvents = Data()->GetNEvents();
901 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
902 if (firstEvt < 0) firstEvt = 0;
903 std::vector<Double_t> values(lastEvt-firstEvt);
905 nEvents = values.size();
911 Log() << kHEADER <<
Form(
"[%s] : ",DataInfo().GetName())
912 <<
"Evaluation of " << GetMethodName() <<
" on "
914 <<
" sample (" << nEvents <<
" events)" <<
Endl;
916 for (
Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
917 Data()->SetCurrentEvent(ievt);
918 values[ievt] = GetMvaValue();
923 if (modulo <= 0 ) modulo = 1;
929 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
942 auto result = GetMvaValues(firstEvt, lastEvt, logProgress);
952 Data()->SetCurrentType(
type);
957 Long64_t nEvents = Data()->GetNEvents();
962 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"Evaluation of " << GetMethodName() <<
" on "
965 mvaProb->
Resize( nEvents );
966 for (
Int_t ievt=0; ievt<nEvents; ievt++) {
968 Data()->SetCurrentEvent(ievt);
970 if (proba < 0)
break;
971 mvaProb->
SetValue( proba, ievt, DataInfo().IsSignal( Data()->GetEvent()) );
975 if (modulo <= 0 ) modulo = 1;
979 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",DataInfo().GetName())
980 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
999 Data()->SetCurrentType(
type);
1001 bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
1003 Double_t m1 = 0, m2 = 0,
s1 = 0, s2 = 0, s12 = 0;
1004 const Int_t nevt = GetNEvents();
1009 Log() << kINFO <<
"Calculate regression for all events" <<
Endl;
1011 for (
Long64_t ievt=0; ievt<nevt; ievt++) {
1013 const Event* ev = Data()->GetEvent(ievt);
1016 Float_t r = GetRegressionValues()[0];
1035 m1 += t*w;
s1 += t*t*w;
1036 m2 +=
r*w; s2 +=
r*
r*w;
1040 if (ievt % modulo == 0)
1044 Log() << kINFO <<
"Elapsed time for evaluation of " << nevt <<
" events: "
1056 corr = s12/sumw - m1*m2;
1057 corr /=
TMath::Sqrt( (
s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1068 for (
Long64_t ievt=0; ievt<nevt; ievt++) {
1070 hist->
Fill( rV[ievt], tV[ievt], wV[ievt] );
1071 if (
d >= devMin &&
d <= devMax) {
1073 biasT += wV[ievt] *
d;
1075 rmsT += wV[ievt] *
d *
d;
1076 histT->
Fill( rV[ievt], tV[ievt], wV[ievt] );
1094 Data()->SetCurrentType(savedType);
1104 if (!resMulticlass) Log() << kFATAL<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"unable to create pointer in TestMulticlass, exiting."<<
Endl;
1113 TString histNamePrefix(GetTestvarName());
1114 TString histNamePrefixTest{histNamePrefix +
"_Test"};
1115 TString histNamePrefixTrain{histNamePrefix +
"_Train"};
1136 if (0==mvaRes && !(GetMethodTypeName().Contains(
"Cuts"))) {
1137 Log()<<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"mvaRes " << mvaRes <<
" GetMethodTypeName " << GetMethodTypeName()
1138 <<
" contains " << !(GetMethodTypeName().Contains(
"Cuts")) <<
Endl;
1139 Log() << kFATAL<<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"<TestInit> Test variable " << GetTestvarName()
1140 <<
" not found in tree" <<
Endl;
1145 fMeanS, fMeanB, fRmsS, fRmsB, fXmin, fXmax, fSignalClass );
1153 fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
1165 TestvarName=
Form(
"[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1168 TestvarName=GetTestvarName();
1170 TH1* mva_s =
new TH1D( TestvarName +
"_S",TestvarName +
"_S", fNbinsMVAoutput, fXmin, sxmax );
1171 TH1* mva_b =
new TH1D( TestvarName +
"_B",TestvarName +
"_B", fNbinsMVAoutput, fXmin, sxmax );
1172 mvaRes->
Store(mva_s,
"MVA_S");
1173 mvaRes->
Store(mva_b,
"MVA_B");
1183 proba_s =
new TH1D( TestvarName +
"_Proba_S", TestvarName +
"_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1184 proba_b =
new TH1D( TestvarName +
"_Proba_B", TestvarName +
"_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1185 mvaRes->
Store(proba_s,
"Prob_S");
1186 mvaRes->
Store(proba_b,
"Prob_B");
1191 rarity_s =
new TH1D( TestvarName +
"_Rarity_S", TestvarName +
"_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1192 rarity_b =
new TH1D( TestvarName +
"_Rarity_B", TestvarName +
"_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1193 mvaRes->
Store(rarity_s,
"Rar_S");
1194 mvaRes->
Store(rarity_b,
"Rar_B");
1200 TH1* mva_eff_s =
new TH1D( TestvarName +
"_S_high", TestvarName +
"_S_high", fNbinsH, fXmin, sxmax );
1201 TH1* mva_eff_b =
new TH1D( TestvarName +
"_B_high", TestvarName +
"_B_high", fNbinsH, fXmin, sxmax );
1202 mvaRes->
Store(mva_eff_s,
"MVA_HIGHBIN_S");
1203 mvaRes->
Store(mva_eff_b,
"MVA_HIGHBIN_B");
1212 Log() << kHEADER <<
Form(
"[%s] : ",DataInfo().GetName())<<
"Loop over test events and fill histograms with classifier response..." <<
Endl <<
Endl;
1213 if (mvaProb) Log() << kINFO <<
"Also filling probability and rarity histograms (on request)..." <<
Endl;
1217 if ( mvaRes->
GetSize() != GetNEvents() ) {
1218 Log() << kFATAL <<
TString::Format(
"Inconsistent result size %lld with number of events %u ", mvaRes->
GetSize() , GetNEvents() ) <<
Endl;
1219 assert(mvaRes->
GetSize() == GetNEvents());
1222 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1224 const Event* ev = GetEvent(ievt);
1228 if (DataInfo().IsSignal(ev)) {
1230 mva_s ->
Fill(
v, w );
1232 proba_s->
Fill( (*mvaProb)[ievt][0], w );
1233 rarity_s->
Fill( GetRarity(
v ), w );
1236 mva_eff_s ->
Fill(
v, w );
1240 mva_b ->
Fill(
v, w );
1242 proba_b->
Fill( (*mvaProb)[ievt][0], w );
1243 rarity_b->
Fill( GetRarity(
v ), w );
1245 mva_eff_b ->
Fill(
v, w );
1260 if (fSplS) {
delete fSplS; fSplS = 0; }
1261 if (fSplB) {
delete fSplB; fSplB = 0; }
1275 tf << prefix <<
"#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1276 tf << prefix <<
"Method : " << GetMethodTypeName() <<
"::" << GetMethodName() << std::endl;
1277 tf.setf(std::ios::left);
1278 tf << prefix <<
"TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() <<
" ["
1279 << GetTrainingTMVAVersionCode() <<
"]" << std::endl;
1280 tf << prefix <<
"ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() <<
" ["
1281 << GetTrainingROOTVersionCode() <<
"]" << std::endl;
1282 tf << prefix <<
"Creator : " << userInfo->
fUser << std::endl;
1286 tf << prefix <<
"Training events: " << Data()->GetNTrainingEvents() << std::endl;
1290 tf << prefix <<
"Analysis type : " <<
"[" << ((GetAnalysisType()==
Types::kRegression) ?
"Regression" :
"Classification") <<
"]" << std::endl;
1291 tf << prefix << std::endl;
1296 tf << prefix << std::endl << prefix <<
"#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1297 WriteOptionsToStream( tf, prefix );
1298 tf << prefix << std::endl;
1301 tf << prefix << std::endl << prefix <<
"#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1302 WriteVarsToStream( tf, prefix );
1303 tf << prefix << std::endl;
1320 AddRegressionOutput(
type );
1322 AddMulticlassOutput(
type );
1324 AddClassifierOutput(
type );
1326 AddClassifierOutputProb(
type );
1336 if (!parent)
return;
1341 AddInfoItem( gi,
"TMVA Release", GetTrainingTMVAVersionString() +
" [" +
gTools().StringFromInt(GetTrainingTMVAVersionCode()) +
"]" );
1342 AddInfoItem( gi,
"ROOT Release", GetTrainingROOTVersionString() +
" [" +
gTools().StringFromInt(GetTrainingROOTVersionCode()) +
"]");
1343 AddInfoItem( gi,
"Creator", userInfo->
fUser);
1347 AddInfoItem( gi,
"Training events",
gTools().StringFromInt(Data()->GetNTrainingEvents()));
1348 AddInfoItem( gi,
"TrainingTime",
gTools().StringFromDouble(
const_cast<TMVA::MethodBase*
>(
this)->GetTrainTime()));
1353 AddInfoItem( gi,
"AnalysisType", analysisType );
1357 AddOptionsXMLTo( parent );
1360 AddVarsXMLTo( parent );
1363 if (fModelPersistence)
1364 AddSpectatorsXMLTo( parent );
1367 AddClassesXMLTo(parent);
1370 if (DoRegression()) AddTargetsXMLTo(parent);
1373 GetTransformationHandler(
false).AddXMLTo( parent );
1377 if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1378 if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1381 AddWeightsXMLTo( parent );
1397 ReadWeightsFromStream( rf );
1410 TString tfname( GetWeightFileName() );
1415 <<
"Creating xml weight file: "
1420 gTools().
AddAttr(rootnode,
"Method", GetMethodTypeName() +
"::" + GetMethodName());
1421 WriteStateToXML(rootnode);
1433 TString tfname(GetWeightFileName());
1436 <<
"Reading weight file: "
1442 Log() << kFATAL <<
"Error parsing XML file " << tfname <<
Endl;
1445 ReadStateFromXML(rootnode);
1450 fb.open(tfname.
Data(),std::ios::in);
1451 if (!fb.is_open()) {
1452 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<ReadStateFromFile> "
1453 <<
"Unable to open input weight file: " << tfname <<
Endl;
1455 std::istream fin(&fb);
1456 ReadStateFromStream(fin);
1459 if (!fTxtWeightsOnly) {
1462 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Reading root weight file: "
1465 ReadStateFromStream( *rfile );
1475 ReadStateFromXML(rootnode);
1489 fMethodName = fullMethodName(fullMethodName.
Index(
"::")+2,fullMethodName.
Length());
1492 Log().SetSource( GetName() );
1494 <<
"Read method \"" << GetMethodName() <<
"\" of type \"" << GetMethodTypeName() <<
"\"" <<
Endl;
1504 if (nodeName==
"GeneralInfo") {
1509 while (antypeNode) {
1512 if (
name ==
"TrainingTime")
1515 if (
name ==
"AnalysisType") {
1521 else Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Analysis type " << val <<
" is not known." <<
Endl;
1524 if (
name ==
"TMVA Release" ||
name ==
"TMVA") {
1528 Log() << kDEBUG <<
Form(
"[%s] : ",DataInfo().GetName()) <<
"MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() <<
Endl;
1531 if (
name ==
"ROOT Release" ||
name ==
"ROOT") {
1536 <<
"MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() <<
Endl;
1541 else if (nodeName==
"Options") {
1542 ReadOptionsFromXML(ch);
1546 else if (nodeName==
"Variables") {
1547 ReadVariablesFromXML(ch);
1549 else if (nodeName==
"Spectators") {
1550 ReadSpectatorsFromXML(ch);
1552 else if (nodeName==
"Classes") {
1553 if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1555 else if (nodeName==
"Targets") {
1556 if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1558 else if (nodeName==
"Transformations") {
1559 GetTransformationHandler().ReadFromXML(ch);
1561 else if (nodeName==
"MVAPdfs") {
1563 if (fMVAPdfS) {
delete fMVAPdfS; fMVAPdfS=0; }
1564 if (fMVAPdfB) {
delete fMVAPdfB; fMVAPdfB=0; }
1568 fMVAPdfS =
new PDF(pdfname);
1569 fMVAPdfS->ReadXML(pdfnode);
1572 fMVAPdfB =
new PDF(pdfname);
1573 fMVAPdfB->ReadXML(pdfnode);
1576 else if (nodeName==
"Weights") {
1577 ReadWeightsFromXML(ch);
1580 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Unparsed XML node: '" << nodeName <<
"'" <<
Endl;
1587 if (GetTransformationHandler().GetCallerName() ==
"") GetTransformationHandler().SetCallerName( GetName() );
1603 while (!
TString(buf).BeginsWith(
"Method")) GetLine(fin,buf);
1607 methodType = methodType(methodType.
Last(
' '),methodType.
Length());
1612 if (methodName ==
"") methodName = methodType;
1613 fMethodName = methodName;
1615 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Read method \"" << GetMethodName() <<
"\" of type \"" << GetMethodTypeName() <<
"\"" <<
Endl;
1618 Log().SetSource( GetName() );
1632 while (!
TString(buf).BeginsWith(
"#OPT")) GetLine(fin,buf);
1633 ReadOptionsFromStream(fin);
1637 fin.getline(buf,512);
1638 while (!
TString(buf).BeginsWith(
"#VAR")) fin.getline(buf,512);
1639 ReadVarsFromStream(fin);
1644 if (IsNormalised()) {
1650 if ( fVarTransformString ==
"None") {
1653 }
else if ( fVarTransformString ==
"Decorrelate" ) {
1655 }
else if ( fVarTransformString ==
"PCA" ) {
1656 varTrafo = GetTransformationHandler().AddTransformation(
new VariablePCATransform(DataInfo()), -1 );
1657 }
else if ( fVarTransformString ==
"Uniform" ) {
1658 varTrafo = GetTransformationHandler().AddTransformation(
new VariableGaussTransform(DataInfo(),
"Uniform"), -1 );
1659 }
else if ( fVarTransformString ==
"Gauss" ) {
1661 }
else if ( fVarTransformString ==
"GaussDecorr" ) {
1665 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<ProcessOptions> Variable transform '"
1666 << fVarTransformString <<
"' unknown." <<
Endl;
1669 if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1670 fin.getline(buf,512);
1671 while (!
TString(buf).BeginsWith(
"#MAT")) fin.getline(buf,512);
1674 varTrafo->ReadTransformationFromStream(fin, trafo );
1685 fin.getline(buf,512);
1686 while (!
TString(buf).BeginsWith(
"#MVAPDFS")) fin.getline(buf,512);
1687 if (fMVAPdfS != 0) {
delete fMVAPdfS; fMVAPdfS = 0; }
1688 if (fMVAPdfB != 0) {
delete fMVAPdfB; fMVAPdfB = 0; }
1689 fMVAPdfS =
new PDF(
TString(GetName()) +
" MVA PDF Sig");
1690 fMVAPdfB =
new PDF(
TString(GetName()) +
" MVA PDF Bkg");
1691 fMVAPdfS->SetReadingVersion( GetTrainingTMVAVersionCode() );
1692 fMVAPdfB->SetReadingVersion( GetTrainingTMVAVersionCode() );
1699 fin.getline(buf,512);
1700 while (!
TString(buf).BeginsWith(
"#WGT")) fin.getline(buf,512);
1701 fin.getline(buf,512);
1702 ReadWeightsFromStream( fin );;
1705 if (GetTransformationHandler().GetCallerName() ==
"") GetTransformationHandler().SetCallerName( GetName() );
1715 o << prefix <<
"NVar " << DataInfo().GetNVariables() << std::endl;
1716 std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1717 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1718 o << prefix <<
"NSpec " << DataInfo().GetNSpectators() << std::endl;
1719 varIt = DataInfo().GetSpectatorInfos().begin();
1720 for (; varIt!=DataInfo().GetSpectatorInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1732 istr >> dummy >> readNVar;
1734 if (readNVar!=DataInfo().GetNVariables()) {
1735 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"You declared "<< DataInfo().GetNVariables() <<
" variables in the Reader"
1736 <<
" while there are " << readNVar <<
" variables declared in the file"
1742 std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1744 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt, ++varIdx) {
1751 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"ERROR in <ReadVarsFromStream>" <<
Endl;
1752 Log() << kINFO <<
"The definition (or the order) of the variables found in the input file is" <<
Endl;
1753 Log() << kINFO <<
"is not the same as the one declared in the Reader (which is necessary for" <<
Endl;
1754 Log() << kINFO <<
"the correct working of the method):" <<
Endl;
1755 Log() << kINFO <<
" var #" << varIdx <<
" declared in Reader: " << varIt->GetExpression() <<
Endl;
1756 Log() << kINFO <<
" var #" << varIdx <<
" declared in file : " << varInfo.
GetExpression() <<
Endl;
1757 Log() << kFATAL <<
"The expression declared to the Reader needs to be checked (name or order are wrong)" <<
Endl;
1770 for (
UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1786 for (
UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1788 VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1806 UInt_t nClasses=DataInfo().GetNClasses();
1811 for (
UInt_t iCls=0; iCls<nClasses; ++iCls) {
1812 ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1829 for (
UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1845 if (readNVar!=DataInfo().GetNVariables()) {
1846 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"You declared "<< DataInfo().GetNVariables() <<
" variables in the Reader"
1847 <<
" while there are " << readNVar <<
" variables declared in the file"
1857 existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1862 existingVarInfo = readVarInfo;
1865 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"ERROR in <ReadVariablesFromXML>" <<
Endl;
1866 Log() << kINFO <<
"The definition (or the order) of the variables found in the input file is" <<
Endl;
1867 Log() << kINFO <<
"not the same as the one declared in the Reader (which is necessary for the" <<
Endl;
1868 Log() << kINFO <<
"correct working of the method):" <<
Endl;
1869 Log() << kINFO <<
" var #" << varIdx <<
" declared in Reader: " << existingVarInfo.
GetExpression() <<
Endl;
1870 Log() << kINFO <<
" var #" << varIdx <<
" declared in file : " << readVarInfo.
GetExpression() <<
Endl;
1871 Log() << kFATAL <<
"The expression declared to the Reader needs to be checked (name or order are wrong)" <<
Endl;
1885 if (readNSpec!=DataInfo().GetNSpectators(
kFALSE)) {
1886 Log() << kFATAL<<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"You declared "<< DataInfo().GetNSpectators(
kFALSE) <<
" spectators in the Reader"
1887 <<
" while there are " << readNSpec <<
" spectators declared in the file"
1897 existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1902 existingSpecInfo = readSpecInfo;
1905 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"ERROR in <ReadSpectatorsFromXML>" <<
Endl;
1906 Log() << kINFO <<
"The definition (or the order) of the spectators found in the input file is" <<
Endl;
1907 Log() << kINFO <<
"not the same as the one declared in the Reader (which is necessary for the" <<
Endl;
1908 Log() << kINFO <<
"correct working of the method):" <<
Endl;
1909 Log() << kINFO <<
" spec #" << specIdx <<
" declared in Reader: " << existingSpecInfo.
GetExpression() <<
Endl;
1910 Log() << kINFO <<
" spec #" << specIdx <<
" declared in file : " << readSpecInfo.
GetExpression() <<
Endl;
1911 Log() << kFATAL <<
"The expression declared to the Reader needs to be checked (name or order are wrong)" <<
Endl;
1930 for (
UInt_t icls = 0; icls<readNCls;++icls) {
1932 DataInfo().AddClass(classname);
1940 DataInfo().AddClass(className);
1947 if (DataInfo().GetClassInfo(
"Signal") != 0) {
1948 fSignalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
1952 if (DataInfo().GetClassInfo(
"Background") != 0) {
1953 fBackgroundClass = DataInfo().GetClassInfo(
"Background")->GetNumber();
1973 DataInfo().AddTarget(expression,
"",
"",0,0);
1985 if (fBaseDir != 0)
return fBaseDir;
1986 Log()<<kDEBUG<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
" Base Directory for " << GetMethodName() <<
" not set yet --> check if already there.." <<
Endl;
1988 if (IsSilentFile()) {
1989 Log() << kFATAL <<
Form(
"Dataset[%s] : ", DataInfo().GetName())
1990 <<
"MethodBase::BaseDir() - No directory exists when running a Method without output file. Enable the "
1991 "output when creating the factory"
1997 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" <<
Endl;
1999 TString defaultDir = GetMethodName();
2003 Log()<<kDEBUG<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
" Base Directory for " << GetMethodTypeName() <<
" does not exist yet--> created it" <<
Endl;
2004 sdir = methodDir->
mkdir(defaultDir);
2007 if (fModelPersistence) {
2010 wfilePath.
Write(
"TrainingPath" );
2011 wfileName.
Write(
"WeightFileName" );
2015 Log()<<kDEBUG<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
" Base Directory for " << GetMethodTypeName() <<
" existed, return it.." <<
Endl;
2025 if (fMethodBaseDir != 0) {
2026 return fMethodBaseDir;
2029 const char *datasetName = DataInfo().
GetName();
2031 Log() << kDEBUG <<
Form(
"Dataset[%s] : ", datasetName) <<
" Base Directory for " << GetMethodTypeName()
2032 <<
" not set yet --> check if already there.." <<
Endl;
2035 if (!factoryBaseDir)
return nullptr;
2036 fMethodBaseDir = factoryBaseDir->
GetDirectory(datasetName);
2037 if (!fMethodBaseDir) {
2038 fMethodBaseDir = factoryBaseDir->
mkdir(datasetName,
Form(
"Base directory for dataset %s", datasetName));
2039 if (!fMethodBaseDir) {
2040 Log() << kFATAL <<
"Can not create dir " << datasetName;
2043 TString methodTypeDir =
Form(
"Method_%s", GetMethodTypeName().Data());
2044 fMethodBaseDir = fMethodBaseDir->GetDirectory(methodTypeDir.
Data());
2046 if (!fMethodBaseDir) {
2048 TString methodTypeDirHelpStr =
Form(
"Directory for all %s methods", GetMethodTypeName().Data());
2049 fMethodBaseDir = datasetDir->
mkdir(methodTypeDir.
Data(), methodTypeDirHelpStr);
2050 Log() << kDEBUG <<
Form(
"Dataset[%s] : ", datasetName) <<
" Base Directory for " << GetMethodName()
2051 <<
" does not exist yet--> created it" <<
Endl;
2054 Log() << kDEBUG <<
Form(
"Dataset[%s] : ", datasetName)
2055 <<
"Return from MethodBaseDir() after creating base directory " <<
Endl;
2056 return fMethodBaseDir;
2073 fWeightFile = theWeightFile;
2081 if (fWeightFile!=
"")
return fWeightFile;
2086 TString wFileDir(GetWeightFileDir());
2087 TString wFileName = GetJobName() +
"_" + GetMethodName() +
2089 if (wFileDir.
IsNull() )
return wFileName;
2091 return ( wFileDir + (wFileDir[wFileDir.
Length()-1]==
'/' ?
"" :
"/")
2103 if (0 != fMVAPdfS) {
2104 fMVAPdfS->GetOriginalHist()->Write();
2105 fMVAPdfS->GetSmoothedHist()->Write();
2106 fMVAPdfS->GetPDFHist()->Write();
2108 if (0 != fMVAPdfB) {
2109 fMVAPdfB->GetOriginalHist()->Write();
2110 fMVAPdfB->GetSmoothedHist()->Write();
2111 fMVAPdfB->GetPDFHist()->Write();
2117 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<WriteEvaluationHistosToFile> Unknown result: "
2118 << GetMethodName() << (treetype==
Types::kTraining?
"/kTraining":
"/kTesting")
2119 <<
"/kMaxAnalysisType" <<
Endl;
2124 GetTransformationHandler().PlotVariables (GetEventCollection(
Types::kTesting ), BaseDir() );
2126 Log() << kINFO <<
TString::Format(
"Dataset[%s] : ",DataInfo().GetName())
2127 <<
" variable plots are not produces ! The number of variables is " << DataInfo().GetNVariables()
2147 fin.getline(buf,512);
2149 if (
line.BeginsWith(
"TMVA Release")) {
2153 std::stringstream s(code.
Data());
2154 s >> fTMVATrainingVersion;
2155 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() <<
Endl;
2157 if (
line.BeginsWith(
"ROOT Release")) {
2161 std::stringstream s(code.
Data());
2162 s >> fROOTTrainingVersion;
2163 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() <<
Endl;
2165 if (
line.BeginsWith(
"Analysis type")) {
2169 std::stringstream s(code.
Data());
2170 std::string analysisType;
2172 if (analysisType ==
"regression" || analysisType ==
"Regression") SetAnalysisType(
Types::kRegression );
2173 else if (analysisType ==
"classification" || analysisType ==
"Classification") SetAnalysisType(
Types::kClassification );
2174 else if (analysisType ==
"multiclass" || analysisType ==
"Multiclass") SetAnalysisType(
Types::kMulticlass );
2175 else Log() << kFATAL <<
"Analysis type " << analysisType <<
" from weight-file not known!" << std::endl;
2177 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Method was trained for "
2197 if (mvaRes==0 || mvaRes->
GetSize()==0) {
2198 Log() << kERROR<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<CreateMVAPdfs> No result of classifier testing available" <<
Endl;
2205 TH1* histMVAPdfS =
new TH1D( GetMethodTypeName() +
"_tr_S", GetMethodTypeName() +
"_tr_S",
2206 fMVAPdfS->GetHistNBins( mvaRes->
GetSize() ), minVal, maxVal );
2207 TH1* histMVAPdfB =
new TH1D( GetMethodTypeName() +
"_tr_B", GetMethodTypeName() +
"_tr_B",
2208 fMVAPdfB->GetHistNBins( mvaRes->
GetSize() ), minVal, maxVal );
2212 histMVAPdfS->
Sumw2();
2213 histMVAPdfB->
Sumw2();
2218 Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2220 if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->
Fill( theVal, theWeight );
2221 else histMVAPdfB->
Fill( theVal, theWeight );
2230 histMVAPdfS->
Write();
2231 histMVAPdfB->
Write();
2234 fMVAPdfS->BuildPDF ( histMVAPdfS );
2235 fMVAPdfB->BuildPDF ( histMVAPdfB );
2236 fMVAPdfS->ValidatePDF( histMVAPdfS );
2237 fMVAPdfB->ValidatePDF( histMVAPdfB );
2239 if (DataInfo().GetNClasses() == 2) {
2240 Log() << kINFO<<
Form(
"Dataset[%s] : ",DataInfo().GetName())
2241 <<
Form(
"<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2242 GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2254 if (!fMVAPdfS || !fMVAPdfB) {
2255 Log() << kINFO<<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" <<
Endl;
2258 Double_t sigFraction = DataInfo().GetTrainingSumSignalWeights() / (DataInfo().GetTrainingSumSignalWeights() + DataInfo().GetTrainingSumBackgrWeights() );
2261 return GetProba(mvaVal,sigFraction);
2269 if (!fMVAPdfS || !fMVAPdfB) {
2270 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetProba> MVA PDFs for Signal and Background don't exist" <<
Endl;
2273 Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2274 Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2276 Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2278 return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2291 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2292 <<
"select option \"CreateMVAPdfs\"" <<
Endl;
2307 Data()->SetCurrentType(
type);
2317 else if (list->
GetSize() > 2) {
2318 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetEfficiency> Wrong number of arguments"
2319 <<
" in string: " << theString
2320 <<
" | required format, e.g., Efficiency:0.05, or empty string" <<
Endl;
2328 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetEfficiency> Binning mismatch between signal and background histos" <<
Endl;
2336 TH1 * effhist = results->
GetHist(
"MVA_HIGHBIN_S");
2343 if (results->
DoesExist(
"MVA_EFF_S")==0) {
2346 TH1* eff_s =
new TH1D( GetTestvarName() +
"_effS", GetTestvarName() +
" (signal)", fNbinsH,
xmin,
xmax );
2347 TH1* eff_b =
new TH1D( GetTestvarName() +
"_effB", GetTestvarName() +
" (background)", fNbinsH,
xmin,
xmax );
2348 results->
Store(eff_s,
"MVA_EFF_S");
2349 results->
Store(eff_b,
"MVA_EFF_B");
2352 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2356 for (
UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2359 Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2360 Float_t theWeight = GetEvent(ievt)->GetWeight();
2361 Float_t theVal = (*mvaRes)[ievt];
2364 TH1* theHist = isSignal ? eff_s : eff_b;
2367 if (isSignal) nevtS+=theWeight;
2371 if (sign > 0 && maxbin > fNbinsH)
continue;
2372 if (sign < 0 && maxbin < 1 )
continue;
2373 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2374 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2379 for (
Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->
AddBinContent( ibin , theWeight );
2381 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetEfficiency> Mismatch in sign" <<
Endl;
2392 TH1* eff_BvsS =
new TH1D( GetTestvarName() +
"_effBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
2393 results->
Store(eff_BvsS,
"MVA_EFF_BvsS");
2398 TH1* rej_BvsS =
new TH1D( GetTestvarName() +
"_rejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
2399 results->
Store(rej_BvsS);
2401 rej_BvsS->
SetYTitle(
"Backgr rejection (1-eff)" );
2404 TH1* inveff_BvsS =
new TH1D( GetTestvarName() +
"_invBeffvsSeff",
2405 GetTestvarName(), fNbins, 0, 1 );
2406 results->
Store(inveff_BvsS);
2408 inveff_BvsS->
SetYTitle(
"Inverse backgr. eff (1/eff)" );
2414 fSplRefS =
new TSpline1(
"spline2_signal",
new TGraph( eff_s ) );
2415 fSplRefB =
new TSpline1(
"spline2_background",
new TGraph( eff_b ) );
2429 for (
Int_t bini=1; bini<=fNbins; bini++) {
2442 if (effB>std::numeric_limits<double>::epsilon())
2451 Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2452 Int_t nbins_ = 5000;
2453 for (
Int_t bini=1; bini<=nbins_; bini++) {
2456 effS = (bini - 0.5)/
Float_t(nbins_);
2457 rejB = 1.0 - fSpleffBvsS->Eval( effS );
2460 if ((effS - rejB)*(effS_ - rejB_) < 0)
break;
2467 SetSignalReferenceCut( cut );
2472 if (0 == fSpleffBvsS) {
2478 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2479 Int_t nbins_ = 1000;
2485 for (
Int_t bini=1; bini<=nbins_; bini++) {
2488 effS = (bini - 0.5)/
Float_t(nbins_);
2489 effB = fSpleffBvsS->Eval( effS );
2490 integral += (1.0 - effB);
2504 for (
Int_t bini=1; bini<=nbins_; bini++) {
2507 effS = (bini - 0.5)/
Float_t(nbins_);
2508 effB = fSpleffBvsS->Eval( effS );
2511 if ((effB - effBref)*(effB_ - effBref) <= 0)
break;
2517 effS = 0.5*(effS + effS_);
2520 if (nevtS > 0) effSerr =
TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2545 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetTrainingEfficiency> Wrong number of arguments"
2546 <<
" in string: " << theString
2547 <<
" | required format, e.g., Efficiency:0.05" <<
Endl;
2560 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2568 TH1 * effhist = results->
GetHist(
"MVA_HIGHBIN_S");
2573 if (results->
DoesExist(
"MVA_TRAIN_S")==0) {
2579 TH1* mva_s_tr =
new TH1D( GetTestvarName() +
"_Train_S",GetTestvarName() +
"_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2580 TH1* mva_b_tr =
new TH1D( GetTestvarName() +
"_Train_B",GetTestvarName() +
"_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2581 results->
Store(mva_s_tr,
"MVA_TRAIN_S");
2582 results->
Store(mva_b_tr,
"MVA_TRAIN_B");
2587 TH1* mva_eff_tr_s =
new TH1D( GetTestvarName() +
"_trainingEffS", GetTestvarName() +
" (signal)",
2589 TH1* mva_eff_tr_b =
new TH1D( GetTestvarName() +
"_trainingEffB", GetTestvarName() +
" (background)",
2591 results->
Store(mva_eff_tr_s,
"MVA_TRAINEFF_S");
2592 results->
Store(mva_eff_tr_b,
"MVA_TRAINEFF_B");
2595 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2597 std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2598 assert( (
Long64_t) mvaValues.size() == Data()->GetNEvents());
2601 for (
Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2603 Data()->SetCurrentEvent(ievt);
2604 const Event* ev = GetEvent();
2609 TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2610 TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2612 theClsHist->
Fill( theVal, theWeight );
2616 if (sign > 0 && maxbin > fNbinsH)
continue;
2617 if (sign < 0 && maxbin < 1 )
continue;
2618 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2619 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2621 if (sign > 0)
for (
Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->
AddBinContent( ibin , theWeight );
2622 else for (
Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->
AddBinContent( ibin , theWeight );
2635 TH1* eff_bvss =
new TH1D( GetTestvarName() +
"_trainingEffBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
2637 TH1* rej_bvss =
new TH1D( GetTestvarName() +
"_trainingRejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
2638 results->
Store(eff_bvss,
"EFF_BVSS_TR");
2639 results->
Store(rej_bvss,
"REJ_BVSS_TR");
2645 if (fSplTrainRefS)
delete fSplTrainRefS;
2646 if (fSplTrainRefB)
delete fSplTrainRefB;
2647 fSplTrainRefS =
new TSpline1(
"spline2_signal",
new TGraph( mva_eff_tr_s ) );
2648 fSplTrainRefB =
new TSpline1(
"spline2_background",
new TGraph( mva_eff_tr_b ) );
2661 fEffS = results->
GetHist(
"MVA_TRAINEFF_S");
2662 for (
Int_t bini=1; bini<=fNbins; bini++) {
2680 fSplTrainEffBvsS =
new TSpline1(
"effBvsS",
new TGraph( eff_bvss ) );
2684 if (0 == fSplTrainEffBvsS)
return 0.0;
2687 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2688 Int_t nbins_ = 1000;
2689 for (
Int_t bini=1; bini<=nbins_; bini++) {
2692 effS = (bini - 0.5)/
Float_t(nbins_);
2693 effB = fSplTrainEffBvsS->Eval( effS );
2696 if ((effB - effBref)*(effB_ - effBref) <= 0)
break;
2701 return 0.5*(effS + effS_);
2710 if (!resMulticlass) Log() << kFATAL<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"unable to create pointer in GetMulticlassEfficiency, exiting."<<
Endl;
2722 if (!resMulticlass) Log() << kFATAL<<
"unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<
Endl;
2724 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Determine optimal multiclass cuts for training data..." <<
Endl;
2725 for (
UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2756 Log() << kFATAL <<
"Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2760 Data()->SetCurrentType(
type);
2764 if (resMulticlass ==
nullptr) {
2765 Log() << kFATAL <<
Form(
"Dataset[%s] : ", DataInfo().GetName())
2766 <<
"unable to create pointer in GetMulticlassEfficiency, exiting." <<
Endl;
2781 Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2783 return (rms > 0) ?
TMath::Abs(fMeanS - fMeanB)/rms : 0;
2807 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2808 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetSeparation> Mismatch in pdfs" <<
Endl;
2809 if (!pdfS) pdfS = fSplS;
2810 if (!pdfB) pdfB = fSplB;
2812 if (!fSplS || !fSplB) {
2813 Log()<<kDEBUG<<
Form(
"[%s] : ",DataInfo().GetName())<<
"could not calculate the separation, distributions"
2814 <<
" fSplS or fSplB are not yet filled" <<
Endl;
2829 if ((!histS && histB) || (histS && !histB))
2830 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" <<
Endl;
2832 if (histS==0 || histB==0)
return 0.;
2845 for (
UInt_t i=0; i<nsteps; i++) {
2851 return integral*step;
2863 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2864 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetSeparation> Mismatch in pdfs" <<
Endl;
2865 if (!pdfS) pdfS = fSplS;
2866 if (!pdfB) pdfB = fSplB;
2868 if (pdfS==0 || pdfB==0)
return 0.;
2877 for (
UInt_t i=0; i<nsteps; i++) {
2881 return integral*step;
2891 Double_t& max_significance_value )
const
2896 Double_t effS(0),effB(0),significance(0);
2897 TH1D *temp_histogram =
new TH1D(
"temp",
"temp", fNbinsH, fXmin, fXmax );
2899 if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2900 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetMaximumSignificance> "
2901 <<
"Number of signal or background events is <= 0 ==> abort"
2905 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Using ratio SignalEvents/BackgroundEvents = "
2906 << SignalEvents/BackgroundEvents <<
Endl;
2911 if ( (eff_s==0) || (eff_b==0) ) {
2912 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Efficiency histograms empty !" <<
Endl;
2913 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"no maximum cut found, return 0" <<
Endl;
2917 for (
Int_t bin=1; bin<=fNbinsH; bin++) {
2922 significance = sqrt(SignalEvents)*( effS )/sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
2932 delete temp_histogram;
2934 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Optimal cut at : " << max_significance <<
Endl;
2935 Log() << kINFO<<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"Maximum significance: " << max_significance_value <<
Endl;
2937 return max_significance;
2951 Data()->SetCurrentType(treeType);
2953 Long64_t entries = Data()->GetNEvents();
2957 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<CalculateEstimator> Wrong tree type: " << treeType <<
Endl;
2960 UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
2976 for (
Int_t ievt = 0; ievt < entries; ievt++) {
2978 const Event* ev = GetEvent(ievt);
2983 if (DataInfo().IsSignal(ev)) {
2985 meanS += weight*theVar;
2986 rmsS += weight*theVar*theVar;
2990 meanB += weight*theVar;
2991 rmsB += weight*theVar*theVar;
2999 meanS = meanS/sumwS;
3000 meanB = meanB/sumwB;
3004 Data()->SetCurrentType(previousTreeType);
3014 if (theClassFileName ==
"")
3015 classFileName = GetWeightFileDir() +
"/" + GetJobName() +
"_" + GetMethodName() +
".class.C";
3017 classFileName = theClassFileName;
3021 TString tfname( classFileName );
3023 <<
"Creating standalone class: "
3026 std::ofstream fout( classFileName );
3028 Log() << kFATAL <<
"<MakeClass> Unable to open file: " << classFileName <<
Endl;
3033 fout <<
"// Class: " << className << std::endl;
3034 fout <<
"// Automatically generated by MethodBase::MakeClass" << std::endl <<
"//" << std::endl;
3038 fout <<
"/* configuration options =====================================================" << std::endl << std::endl;
3039 WriteStateToStream( fout );
3041 fout <<
"============================================================================ */" << std::endl;
3044 fout <<
"" << std::endl;
3045 fout <<
"#include <array>" << std::endl;
3046 fout <<
"#include <vector>" << std::endl;
3047 fout <<
"#include <cmath>" << std::endl;
3048 fout <<
"#include <string>" << std::endl;
3049 fout <<
"#include <iostream>" << std::endl;
3050 fout <<
"" << std::endl;
3053 this->MakeClassSpecificHeader( fout, className );
3055 fout <<
"#ifndef IClassifierReader__def" << std::endl;
3056 fout <<
"#define IClassifierReader__def" << std::endl;
3058 fout <<
"class IClassifierReader {" << std::endl;
3060 fout <<
" public:" << std::endl;
3062 fout <<
" // constructor" << std::endl;
3063 fout <<
" IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
3064 fout <<
" virtual ~IClassifierReader() {}" << std::endl;
3066 fout <<
" // return classifier response" << std::endl;
3068 fout <<
" virtual std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3070 fout <<
" virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3073 fout <<
" // returns classifier status" << std::endl;
3074 fout <<
" bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3076 fout <<
" protected:" << std::endl;
3078 fout <<
" bool fStatusIsClean;" << std::endl;
3079 fout <<
"};" << std::endl;
3081 fout <<
"#endif" << std::endl;
3083 fout <<
"class " << className <<
" : public IClassifierReader {" << std::endl;
3085 fout <<
" public:" << std::endl;
3087 fout <<
" // constructor" << std::endl;
3088 fout <<
" " << className <<
"( std::vector<std::string>& theInputVars )" << std::endl;
3089 fout <<
" : IClassifierReader()," << std::endl;
3090 fout <<
" fClassName( \"" << className <<
"\" )," << std::endl;
3091 fout <<
" fNvars( " << GetNvar() <<
" )" << std::endl;
3092 fout <<
" {" << std::endl;
3093 fout <<
" // the training input variables" << std::endl;
3094 fout <<
" const char* inputVars[] = { ";
3095 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3096 fout <<
"\"" << GetOriginalVarName(ivar) <<
"\"";
3097 if (ivar<GetNvar()-1) fout <<
", ";
3099 fout <<
" };" << std::endl;
3101 fout <<
" // sanity checks" << std::endl;
3102 fout <<
" if (theInputVars.size() <= 0) {" << std::endl;
3103 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3104 fout <<
" fStatusIsClean = false;" << std::endl;
3105 fout <<
" }" << std::endl;
3107 fout <<
" if (theInputVars.size() != fNvars) {" << std::endl;
3108 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3109 fout <<
" << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3110 fout <<
" fStatusIsClean = false;" << std::endl;
3111 fout <<
" }" << std::endl;
3113 fout <<
" // validate input variables" << std::endl;
3114 fout <<
" for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3115 fout <<
" if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3116 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3117 fout <<
" << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3118 fout <<
" fStatusIsClean = false;" << std::endl;
3119 fout <<
" }" << std::endl;
3120 fout <<
" }" << std::endl;
3122 fout <<
" // initialize min and max vectors (for normalisation)" << std::endl;
3123 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3124 fout <<
" fVmin[" << ivar <<
"] = " << std::setprecision(15) << GetXmin( ivar ) <<
";" << std::endl;
3125 fout <<
" fVmax[" << ivar <<
"] = " << std::setprecision(15) << GetXmax( ivar ) <<
";" << std::endl;
3128 fout <<
" // initialize input variable types" << std::endl;
3129 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3130 fout <<
" fType[" << ivar <<
"] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() <<
"\';" << std::endl;
3133 fout <<
" // initialize constants" << std::endl;
3134 fout <<
" Initialize();" << std::endl;
3136 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3137 fout <<
" // initialize transformation" << std::endl;
3138 fout <<
" InitTransform();" << std::endl;
3140 fout <<
" }" << std::endl;
3142 fout <<
" // destructor" << std::endl;
3143 fout <<
" virtual ~" << className <<
"() {" << std::endl;
3144 fout <<
" Clear(); // method-specific" << std::endl;
3145 fout <<
" }" << std::endl;
3147 fout <<
" // the classifier response" << std::endl;
3148 fout <<
" // \"inputValues\" is a vector of input values in the same order as the" << std::endl;
3149 fout <<
" // variables given to the constructor" << std::endl;
3151 fout <<
" std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const override;" << std::endl;
3153 fout <<
" double GetMvaValue( const std::vector<double>& inputValues ) const override;" << std::endl;
3156 fout <<
" private:" << std::endl;
3158 fout <<
" // method-specific destructor" << std::endl;
3159 fout <<
" void Clear();" << std::endl;
3161 if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3162 fout <<
" // input variable transformation" << std::endl;
3163 GetTransformationHandler().MakeFunction(fout, className,1);
3164 fout <<
" void InitTransform();" << std::endl;
3165 fout <<
" void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3168 fout <<
" // common member variables" << std::endl;
3169 fout <<
" const char* fClassName;" << std::endl;
3171 fout <<
" const size_t fNvars;" << std::endl;
3172 fout <<
" size_t GetNvar() const { return fNvars; }" << std::endl;
3173 fout <<
" char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3175 fout <<
" // normalisation of input variables" << std::endl;
3176 fout <<
" double fVmin[" << GetNvar() <<
"];" << std::endl;
3177 fout <<
" double fVmax[" << GetNvar() <<
"];" << std::endl;
3178 fout <<
" double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3179 fout <<
" // normalise to output range: [-1, 1]" << std::endl;
3180 fout <<
" return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3181 fout <<
" }" << std::endl;
3183 fout <<
" // type of input variable: 'F' or 'I'" << std::endl;
3184 fout <<
" char fType[" << GetNvar() <<
"];" << std::endl;
3186 fout <<
" // initialize internal variables" << std::endl;
3187 fout <<
" void Initialize();" << std::endl;
3189 fout <<
" std::vector<double> GetMulticlassValues__( const std::vector<double>& inputValues ) const;" << std::endl;
3191 fout <<
" double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3193 fout <<
"" << std::endl;
3194 fout <<
" // private members (method specific)" << std::endl;
3197 MakeClassSpecific( fout, className );
3200 fout <<
"inline std::vector<double> " << className <<
"::GetMulticlassValues( const std::vector<double>& inputValues ) const" << std::endl;
3202 fout <<
"inline double " << className <<
"::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3204 fout <<
"{" << std::endl;
3205 fout <<
" // classifier response value" << std::endl;
3207 fout <<
" std::vector<double> retval;" << std::endl;
3209 fout <<
" double retval = 0;" << std::endl;
3212 fout <<
" // classifier response, sanity check first" << std::endl;
3213 fout <<
" if (!IsStatusClean()) {" << std::endl;
3214 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3215 fout <<
" << \" because status is dirty\" << std::endl;" << std::endl;
3216 fout <<
" }" << std::endl;
3217 fout <<
" else {" << std::endl;
3218 if (IsNormalised()) {
3219 fout <<
" // normalise variables" << std::endl;
3220 fout <<
" std::vector<double> iV;" << std::endl;
3221 fout <<
" iV.reserve(inputValues.size());" << std::endl;
3222 fout <<
" int ivar = 0;" << std::endl;
3223 fout <<
" for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3224 fout <<
" varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3225 fout <<
" iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3226 fout <<
" }" << std::endl;
3227 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() !=
Types::kLikelihood &&
3229 fout <<
" Transform( iV, -1 );" << std::endl;
3233 fout <<
" retval = GetMulticlassValues__( iV );" << std::endl;
3235 fout <<
" retval = GetMvaValue__( iV );" << std::endl;
3238 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() !=
Types::kLikelihood &&
3240 fout <<
" std::vector<double> iV(inputValues);" << std::endl;
3241 fout <<
" Transform( iV, -1 );" << std::endl;
3243 fout <<
" retval = GetMulticlassValues__( iV );" << std::endl;
3245 fout <<
" retval = GetMvaValue__( iV );" << std::endl;
3249 fout <<
" retval = GetMulticlassValues__( inputValues );" << std::endl;
3251 fout <<
" retval = GetMvaValue__( inputValues );" << std::endl;
3255 fout <<
" }" << std::endl;
3257 fout <<
" return retval;" << std::endl;
3258 fout <<
"}" << std::endl;
3261 if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3262 GetTransformationHandler().MakeFunction(fout, className,2);
3274 std::streambuf* cout_sbuf = std::cout.rdbuf();
3275 std::ofstream* o = 0;
3276 if (
gConfig().WriteOptionsReference()) {
3277 Log() << kINFO <<
"Print Help message for class " << GetName() <<
" into file: " << GetReferenceFile() <<
Endl;
3278 o =
new std::ofstream( GetReferenceFile(), std::ios::app );
3280 Log() << kFATAL <<
"<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() <<
Endl;
3282 std::cout.rdbuf( o->rdbuf() );
3287 Log() << kINFO <<
Endl;
3289 <<
"================================================================"
3293 <<
"H e l p f o r M V A m e t h o d [ " << GetName() <<
" ] :"
3298 Log() <<
"Help for MVA method [ " << GetName() <<
" ] :" <<
Endl;
3306 Log() <<
"<Suppress this message by specifying \"!H\" in the booking option>" <<
Endl;
3308 <<
"================================================================"
3315 Log() <<
"# End of Message___" <<
Endl;
3318 std::cout.rdbuf( cout_sbuf );
3333 retval = fSplRefS->Eval( theCut );
3335 else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3344 if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3345 else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3358 if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3359 return (Data()->GetEventCollection(
type));
3366 if (fEventCollections.at(idx) == 0) {
3367 fEventCollections.at(idx) = &(Data()->GetEventCollection(
type));
3368 fEventCollections.at(idx) = GetTransformationHandler().CalcTransformations(*(fEventCollections.at(idx)),
kTRUE);
3370 return *(fEventCollections.at(idx));
3378 UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000;
a>>=16;
3379 UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00;
b>>=8;
3380 UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3390 UInt_t a = GetTrainingROOTVersionCode() & 0xff0000;
a>>=16;
3391 UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00;
b>>=8;
3392 UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3403 if (mvaRes != NULL) {
3406 TH1D *mva_s_tr =
dynamic_cast<TH1D*
> (mvaRes->
GetHist(
"MVA_TRAIN_S"));
3407 TH1D *mva_b_tr =
dynamic_cast<TH1D*
> (mvaRes->
GetHist(
"MVA_TRAIN_B"));
3409 if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr)
return -1;
3411 if (SorB ==
's' || SorB ==
'S')
const Bool_t Use_Splines_for_Eff_
const Int_t NBIN_HIST_HIGH
#define ROOT_VERSION_CODE
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
R__EXTERN TSystem * gSystem
#define TMVA_VERSION_CODE
Class to manage histogram axis.
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write all objects in this collection.
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
const char * AsString() const
Return the date & time as a string (ctime() format).
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.
Describe directory structure in memory.
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
virtual Bool_t cd()
Change current directory to "this" directory.
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
void Close(Option_t *option="") override
Close a file.
A TGraph is an object made of two arrays X and Y with npoints each.
1-D histogram with a double per channel (see TH1 documentation)}
1-D histogram with a float per channel (see TH1 documentation)}
TH1 is the base class of all histogram classes in ROOT.
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum=0)
Compute Quantiles for this histogram Quantile x_q of a probability distribution Function F is defined...
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
virtual void SetXTitle(const char *title)
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
virtual Double_t GetMaximum(Double_t maxval=FLT_MAX) const
Return maximum value smaller than maxval of bins in the range, unless the value has been overridden b...
virtual Int_t GetNbinsX() const
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual Int_t GetMaximumBin() const
Return location of bin with maximum value in the range.
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
virtual void SetYTitle(const char *title)
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
virtual Double_t KolmogorovTest(const TH1 *h2, Option_t *option="") const
Statistical test of compatibility in shape between this histogram and h2, using Kolmogorov test.
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
2-D histogram with a float per channel (see TH1 documentation)}
Int_t Fill(Double_t)
Invalid Fill method.
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Class that contains all the information of a class.
TString fWeightFileExtension
Int_t fMaxNumOfAllowedVariables
VariablePlotting & GetVariablePlotting()
class TMVA::Config::VariablePlotting fVariablePlotting
Class that contains all the data information.
Class that contains all the data information.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Float_t GetTarget(UInt_t itgt) const
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Interface for all concrete MVA method implementations.
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
IPythonInteractive()
standard constructor
~IPythonInteractive()
standard destructor
void ClearGraphs()
This function sets the point number to 0 for all graphs.
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Virtual base Class for all MVA method.
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
void SetWeightFileDir(TString fileDir)
set directory of weight file
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables,...
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual Double_t GetSignificance() const
compute significance of mean difference
virtual Double_t GetProba(const Event *ev)
const char * GetName() const
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
void PrintHelpMessage() const
prints out method-specific help method
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual std::vector< Double_t > GetDataMvaValues(DataSet *data=nullptr, Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the given Data type
void SetupMethod()
setup of methods
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
virtual ~MethodBase()
destructor
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, , curve for given number of signal and background events; returns cut for maximum ...
virtual Double_t GetTrainingEfficiency(const TString &)
void SetWeightFileName(TString)
set the weight file name (depreciated)
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
TString GetWeightFileName() const
retrieve weight file name
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------—
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void InitBase()
default initialization called by all constructors
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
void ReadStateFromFile()
Function to write options and weights to file.
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream.
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
void SetTestvarName(const TString &v="")
void ReadVariablesFromXML(void *varnode)
read variable info from XML
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables,...
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity:
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters according to the argument This is just a dummy .
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
void AddVarsXMLTo(void *parent) const
write variable info to XML
void AddTargetsXMLTo(void *parent) const
write target info to XML
void ReadTargetsFromXML(void *tarnode)
read target info from XML
void ProcessBaseOptions()
the option string is decoded, for available options see "DeclareOptions"
void ReadStateFromXML(void *parent)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
void AddClassesXMLTo(void *parent) const
write class info to XML
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
void SetSource(const std::string &source)
PDF wrapper for histograms; uses user-defined spline interpolation.
Double_t GetVal(Double_t x) const
returns value PDF(x)
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
Class that is the base-class for a vector of result.
void Resize(Int_t entries)
std::vector< Float_t > * GetValueVector()
void SetValue(Float_t value, Int_t ievt, Bool_t type)
set MVA response
Class which takes the results of a multiclass classification.
TMatrixD GetConfusionMatrix(Double_t effB)
Returns a confusion matrix where each class is pitted against each other.
Float_t GetAchievablePur(UInt_t cls)
std::vector< Double_t > GetBestMultiClassCuts(UInt_t targetClass)
calculate the best working point (optimal cut values) for the multiclass classifier
void CreateMulticlassHistos(TString prefix, Int_t nbins, Int_t nbins_high)
this function fills the mva response histos for multiclass classification
Float_t GetAchievableEff(UInt_t cls)
void CreateMulticlassPerformanceHistos(TString prefix)
Create performance graphs for this classifier a multiclass setting.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
Bool_t DoesExist(const TString &alias) const
Returns true if there is an object stored in the result for a given alias, false otherwise.
TH1 * GetHist(const TString &alias) const
TList * GetStorage() const
void Store(TObject *obj, const char *alias=0)
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Linear interpolation of TGraph.
Timing information for training and evaluation of MVA methods.
Double_t ElapsedSeconds(void)
computes elapsed tim in seconds
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Singleton class for Global types used by TMVA.
Class for type info of MVA input variable.
void ReadFromXML(void *varnode)
read VariableInfo from stream
const TString & GetExpression() const
void ReadFromStream(std::istream &istr)
read VariableInfo from stream
void AddToXML(void *varnode)
write class to XML
void SetExternalLink(void *p)
void * GetExternalLink() const
A TMultiGraph is a collection of TGraph (or derived) objects.
virtual const char * GetName() const
Returns name of object.
Collectable string class.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
void ToLower()
Change string to lower-case.
Int_t Atoi() const
Return integer value of string.
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
Ssiz_t Last(char c) const
Find last occurrence of a character c.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
virtual const char * GetBuildNode() const
Return the build node name.
virtual int mkdir(const char *name, Bool_t recursive=kFALSE)
Make a file system directory.
virtual const char * WorkingDirectory()
Return working directory.
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Double_t Sqrt(Double_t x)
Short_t Min(Short_t a, Short_t b)