137#pragma warning ( disable : 4355 )
158 fMultiGraph =
nullptr;
172 std::cerr << kERROR <<
"IPythonInteractive::Init: already initialized..." << std::endl;
176 for(
auto& title : graphTitles){
177 fGraphs.push_back(
new TGraph() );
178 fGraphs.back()->SetTitle(title);
179 fGraphs.back()->SetName(title);
180 fGraphs.back()->SetFillColor(color);
181 fGraphs.back()->SetLineColor(color);
182 fGraphs.back()->SetMarkerColor(color);
183 fMultiGraph->Add(fGraphs.back());
195 for(
Int_t i=0; i<fNumGraphs; i++){
209 fGraphs[0]->Set(fIndex+1);
210 fGraphs[1]->Set(fIndex+1);
211 fGraphs[0]->SetPoint(fIndex,
x, y1);
212 fGraphs[1]->SetPoint(fIndex,
x, y2);
225 for(
Int_t i=0; i<fNumGraphs;i++){
226 fGraphs[i]->Set(fIndex+1);
227 fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
247 fAnalysisType (
Types::kNoAnalysisType ),
248 fRegressionReturnVal ( 0 ),
249 fMulticlassReturnVal ( 0 ),
250 fDataSetInfo ( dsi ),
251 fSignalReferenceCut ( 0.5 ),
252 fSignalReferenceCutOrientation( 1. ),
253 fVariableTransformType (
Types::kSignal ),
254 fJobName ( jobName ),
255 fMethodName ( methodTitle ),
256 fMethodType ( methodType ),
260 fConstructedFromWeightFile (
kFALSE ),
262 fMethodBaseDir ( 0 ),
265 fModelPersistence (
kTRUE),
276 fSplTrainEffBvsS ( 0 ),
277 fVarTransformString (
"None" ),
278 fTransformationPointer ( 0 ),
279 fTransformation ( dsi, methodTitle ),
281 fVerbosityLevelString (
"Default" ),
284 fIgnoreNegWeightsInTraining(
kFALSE ),
286 fBackgroundClass ( 0 ),
311 fAnalysisType (
Types::kNoAnalysisType ),
312 fRegressionReturnVal ( 0 ),
313 fMulticlassReturnVal ( 0 ),
314 fDataSetInfo ( dsi ),
315 fSignalReferenceCut ( 0.5 ),
316 fVariableTransformType (
Types::kSignal ),
318 fMethodName (
"MethodBase" ),
319 fMethodType ( methodType ),
321 fTMVATrainingVersion ( 0 ),
322 fROOTTrainingVersion ( 0 ),
323 fConstructedFromWeightFile (
kTRUE ),
325 fMethodBaseDir ( 0 ),
328 fModelPersistence (
kTRUE),
329 fWeightFile ( weightFile ),
339 fSplTrainEffBvsS ( 0 ),
340 fVarTransformString (
"None" ),
341 fTransformationPointer ( 0 ),
342 fTransformation ( dsi,
"" ),
344 fVerbosityLevelString (
"Default" ),
347 fIgnoreNegWeightsInTraining(
kFALSE ),
349 fBackgroundClass ( 0 ),
367 if (!fSetupCompleted) Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Calling destructor of method which got never setup" <<
Endl;
370 if (fInputVars != 0) { fInputVars->clear();
delete fInputVars; }
371 if (fRanking != 0)
delete fRanking;
374 if (fDefaultPDF!= 0) {
delete fDefaultPDF; fDefaultPDF = 0; }
375 if (fMVAPdfS != 0) {
delete fMVAPdfS; fMVAPdfS = 0; }
376 if (fMVAPdfB != 0) {
delete fMVAPdfB; fMVAPdfB = 0; }
379 if (fSplS) {
delete fSplS; fSplS = 0; }
380 if (fSplB) {
delete fSplB; fSplB = 0; }
381 if (fSpleffBvsS) {
delete fSpleffBvsS; fSpleffBvsS = 0; }
382 if (fSplRefS) {
delete fSplRefS; fSplRefS = 0; }
383 if (fSplRefB) {
delete fSplRefB; fSplRefB = 0; }
384 if (fSplTrainRefS) {
delete fSplTrainRefS; fSplTrainRefS = 0; }
385 if (fSplTrainRefB) {
delete fSplTrainRefB; fSplTrainRefB = 0; }
386 if (fSplTrainEffBvsS) {
delete fSplTrainEffBvsS; fSplTrainEffBvsS = 0; }
388 for (
Int_t i = 0; i < 2; i++ ) {
389 if (fEventCollections.at(i)) {
390 for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
391 it != fEventCollections.at(i)->end(); ++it) {
394 delete fEventCollections.at(i);
395 fEventCollections.at(i) = 0;
399 if (fRegressionReturnVal)
delete fRegressionReturnVal;
400 if (fMulticlassReturnVal)
delete fMulticlassReturnVal;
410 if (fSetupCompleted) Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Calling SetupMethod for the second time" <<
Endl;
412 DeclareBaseOptions();
415 fSetupCompleted =
kTRUE;
425 ProcessBaseOptions();
435 CheckForUnusedOptions();
443 SetConfigDescription(
"Configuration options for classifier architecture and tuning" );
451 fSplTrainEffBvsS = 0;
458 fTxtWeightsOnly =
kTRUE;
468 fInputVars =
new std::vector<TString>;
469 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
470 fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
472 fRegressionReturnVal = 0;
473 fMulticlassReturnVal = 0;
475 fEventCollections.resize( 2 );
476 fEventCollections.at(0) = 0;
477 fEventCollections.at(1) = 0;
480 if (DataInfo().GetClassInfo(
"Signal") != 0) {
481 fSignalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
483 if (DataInfo().GetClassInfo(
"Background") != 0) {
484 fBackgroundClass = DataInfo().GetClassInfo(
"Background")->GetNumber();
487 SetConfigDescription(
"Configuration options for MVA method" );
488 SetConfigName(
TString(
"Method") + GetMethodTypeName() );
511 DeclareOptionRef( fVerbose,
"V",
"Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
513 DeclareOptionRef( fVerbosityLevelString=
"Default",
"VerbosityLevel",
"Verbosity level" );
514 AddPreDefVal(
TString(
"Default") );
515 AddPreDefVal(
TString(
"Debug") );
516 AddPreDefVal(
TString(
"Verbose") );
517 AddPreDefVal(
TString(
"Info") );
518 AddPreDefVal(
TString(
"Warning") );
519 AddPreDefVal(
TString(
"Error") );
520 AddPreDefVal(
TString(
"Fatal") );
524 fTxtWeightsOnly =
kTRUE;
527 DeclareOptionRef( fVarTransformString,
"VarTransform",
"List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
529 DeclareOptionRef( fHelp,
"H",
"Print method-specific help message" );
531 DeclareOptionRef( fHasMVAPdfs,
"CreateMVAPdfs",
"Create PDFs for classifier outputs (signal and background)" );
533 DeclareOptionRef( fIgnoreNegWeightsInTraining,
"IgnoreNegWeightsInTraining",
534 "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
546 fDefaultPDF =
new PDF(
TString(GetName())+
"_PDF", GetOptions(),
"MVAPdf" );
547 fDefaultPDF->DeclareOptions();
548 fDefaultPDF->ParseOptions();
549 fDefaultPDF->ProcessOptions();
550 fMVAPdfB =
new PDF(
TString(GetName())+
"_PDFBkg", fDefaultPDF->GetOptions(),
"MVAPdfBkg", fDefaultPDF );
551 fMVAPdfB->DeclareOptions();
552 fMVAPdfB->ParseOptions();
553 fMVAPdfB->ProcessOptions();
554 fMVAPdfS =
new PDF(
TString(GetName())+
"_PDFSig", fMVAPdfB->GetOptions(),
"MVAPdfSig", fDefaultPDF );
555 fMVAPdfS->DeclareOptions();
556 fMVAPdfS->ParseOptions();
557 fMVAPdfS->ProcessOptions();
560 SetOptions( fMVAPdfS->GetOptions() );
565 GetTransformationHandler(),
569 if (fDefaultPDF!= 0) {
delete fDefaultPDF; fDefaultPDF = 0; }
570 if (fMVAPdfS != 0) {
delete fMVAPdfS; fMVAPdfS = 0; }
571 if (fMVAPdfB != 0) {
delete fMVAPdfB; fMVAPdfB = 0; }
575 fVerbosityLevelString =
TString(
"Verbose");
576 Log().SetMinType( kVERBOSE );
578 else if (fVerbosityLevelString ==
"Debug" ) Log().SetMinType( kDEBUG );
579 else if (fVerbosityLevelString ==
"Verbose" ) Log().SetMinType( kVERBOSE );
580 else if (fVerbosityLevelString ==
"Info" ) Log().SetMinType( kINFO );
581 else if (fVerbosityLevelString ==
"Warning" ) Log().SetMinType( kWARNING );
582 else if (fVerbosityLevelString ==
"Error" ) Log().SetMinType( kERROR );
583 else if (fVerbosityLevelString ==
"Fatal" ) Log().SetMinType( kFATAL );
584 else if (fVerbosityLevelString !=
"Default" ) {
585 Log() << kFATAL <<
"<ProcessOptions> Verbosity level type '"
586 << fVerbosityLevelString <<
"' unknown." <<
Endl;
598 DeclareOptionRef( fNormalise=
kFALSE,
"Normalise",
"Normalise input variables" );
599 DeclareOptionRef( fUseDecorr=
kFALSE,
"D",
"Use-decorrelated-variables flag" );
600 DeclareOptionRef( fVariableTransformTypeString=
"Signal",
"VarTransformType",
601 "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
602 AddPreDefVal(
TString(
"Signal") );
603 AddPreDefVal(
TString(
"Background") );
604 DeclareOptionRef( fTxtWeightsOnly=
kTRUE,
"TxtWeightFilesOnly",
"If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
614 DeclareOptionRef( fNbinsMVAPdf = 60,
"NbinsMVAPdf",
"Number of bins used for the PDFs of classifier outputs" );
615 DeclareOptionRef( fNsmoothMVAPdf = 2,
"NsmoothMVAPdf",
"Number of smoothing iterations for classifier PDFs" );
629 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Parameter optimization is not yet implemented for method "
630 << GetName() <<
Endl;
631 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Currently we need to set hardcoded which parameter is tuned in which ranges"<<
Endl;
633 std::map<TString,Double_t> tunedParameters;
634 tunedParameters.size();
635 return tunedParameters;
656 if (Help()) PrintHelpMessage();
659 if(!IsSilentFile()) BaseDir()->cd();
663 GetTransformationHandler().CalcTransformations(Data()->GetEventCollection());
667 <<
"Begin training" <<
Endl;
668 Long64_t nEvents = Data()->GetNEvents();
669 Timer traintimer( nEvents, GetName(),
kTRUE );
672 <<
"\tEnd of training " <<
Endl;
675 <<
"Elapsed time for training with " << nEvents <<
" events: "
679 <<
"\tCreate MVA output for ";
682 if (DoMulticlass()) {
683 Log() <<
Form(
"[%s] : ",DataInfo().GetName())<<
"Multiclass classification on training sample" <<
Endl;
686 else if (!DoRegression()) {
688 Log() <<
Form(
"[%s] : ",DataInfo().GetName())<<
"classification on training sample" <<
Endl;
697 Log() <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"regression on training sample" <<
Endl;
701 Log() <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Create PDFs" <<
Endl;
708 if (fModelPersistence ) WriteStateToFile();
711 if ((!DoRegression()) && (fModelPersistence)) MakeClass();
718 WriteMonitoringHistosToFile();
726 if (!DoRegression()) Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Trying to use GetRegressionDeviation() with a classification job" <<
Endl;
727 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Create results for " << (
type==
Types::kTraining?
"training":
"testing") <<
Endl;
729 bool truncate =
false;
730 TH1F*
h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
735 TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
746 Data()->SetCurrentType(
type);
748 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Create results for " << (
type==
Types::kTraining?
"training":
"testing") <<
Endl;
752 Long64_t nEvents = Data()->GetNEvents();
756 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"Evaluation of " << GetMethodName() <<
" on "
759 regRes->Resize( nEvents );
764 Int_t totalProgressDraws = 100;
765 Int_t drawProgressEvery = 1;
766 if(nEvents >= totalProgressDraws) drawProgressEvery = nEvents/totalProgressDraws;
768 for (
Int_t ievt=0; ievt<nEvents; ievt++) {
770 Data()->SetCurrentEvent(ievt);
771 std::vector< Float_t > vals = GetRegressionValues();
772 regRes->SetValue( vals, ievt );
775 if(ievt % drawProgressEvery == 0 || ievt==nEvents-1) timer.
DrawProgressBar( ievt );
778 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())
779 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
786 TString histNamePrefix(GetTestvarName());
788 regRes->CreateDeviationHistograms( histNamePrefix );
796 Data()->SetCurrentType(
type);
798 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Create results for " << (
type==
Types::kTraining?
"training":
"testing") <<
Endl;
801 if (!resMulticlass) Log() << kFATAL<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"unable to create pointer in AddMulticlassOutput, exiting."<<
Endl;
803 Long64_t nEvents = Data()->GetNEvents();
808 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Multiclass evaluation of " << GetMethodName() <<
" on "
811 resMulticlass->Resize( nEvents );
812 for (
Int_t ievt=0; ievt<nEvents; ievt++) {
813 Data()->SetCurrentEvent(ievt);
814 std::vector< Float_t > vals = GetMulticlassValues();
815 resMulticlass->SetValue( vals, ievt );
819 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())
820 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
827 TString histNamePrefix(GetTestvarName());
830 resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
831 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
838 if (errUpper) *errUpper=-1;
845 Double_t val = GetMvaValue(err, errUpper);
855 return GetMvaValue()*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ?
kTRUE :
kFALSE;
862 return mvaVal*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ?
kTRUE :
kFALSE;
870 Data()->SetCurrentType(
type);
875 Long64_t nEvents = Data()->GetNEvents();
880 std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents,
true);
887 for (
Int_t ievt = 0; ievt < nEvents; ievt++) {
890 auto ev = Data()->GetEvent(ievt);
891 clRes->
SetValue(mvaValues[ievt], ievt, DataInfo().IsSignal(ev));
900 Long64_t nEvents = Data()->GetNEvents();
901 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
902 if (firstEvt < 0) firstEvt = 0;
903 std::vector<Double_t> values(lastEvt-firstEvt);
905 nEvents = values.size();
911 Log() << kHEADER <<
Form(
"[%s] : ",DataInfo().GetName())
912 <<
"Evaluation of " << GetMethodName() <<
" on "
914 <<
" sample (" << nEvents <<
" events)" <<
Endl;
916 for (
Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
917 Data()->SetCurrentEvent(ievt);
918 values[ievt] = GetMvaValue();
923 if (modulo <= 0 ) modulo = 1;
929 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
941 Data()->SetCurrentType(
type);
946 Long64_t nEvents = Data()->GetNEvents();
951 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"Evaluation of " << GetMethodName() <<
" on "
954 mvaProb->
Resize( nEvents );
955 for (
Int_t ievt=0; ievt<nEvents; ievt++) {
957 Data()->SetCurrentEvent(ievt);
959 if (proba < 0)
break;
960 mvaProb->
SetValue( proba, ievt, DataInfo().IsSignal( Data()->GetEvent()) );
964 if (modulo <= 0 ) modulo = 1;
968 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",DataInfo().GetName())
969 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
988 Data()->SetCurrentType(
type);
990 bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
992 Double_t m1 = 0, m2 = 0,
s1 = 0, s2 = 0, s12 = 0;
993 const Int_t nevt = GetNEvents();
998 Log() << kINFO <<
"Calculate regression for all events" <<
Endl;
1000 for (
Long64_t ievt=0; ievt<nevt; ievt++) {
1002 const Event* ev = Data()->GetEvent(ievt);
1005 Float_t r = GetRegressionValues()[0];
1024 m1 += t*w;
s1 += t*t*w;
1025 m2 +=
r*w; s2 +=
r*
r*w;
1030 Log() << kINFO <<
"Elapsed time for evaluation of " << nevt <<
" events: "
1042 corr = s12/sumw - m1*m2;
1043 corr /=
TMath::Sqrt( (
s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1054 for (
Long64_t ievt=0; ievt<nevt; ievt++) {
1056 hist->
Fill( rV[ievt], tV[ievt], wV[ievt] );
1057 if (
d >= devMin &&
d <= devMax) {
1059 biasT += wV[ievt] *
d;
1061 rmsT += wV[ievt] *
d *
d;
1062 histT->
Fill( rV[ievt], tV[ievt], wV[ievt] );
1080 Data()->SetCurrentType(savedType);
1090 if (!resMulticlass) Log() << kFATAL<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"unable to create pointer in TestMulticlass, exiting."<<
Endl;
1099 TString histNamePrefix(GetTestvarName());
1100 TString histNamePrefixTest{histNamePrefix +
"_Test"};
1101 TString histNamePrefixTrain{histNamePrefix +
"_Train"};
1122 if (0==mvaRes && !(GetMethodTypeName().Contains(
"Cuts"))) {
1123 Log()<<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"mvaRes " << mvaRes <<
" GetMethodTypeName " << GetMethodTypeName()
1124 <<
" contains " << !(GetMethodTypeName().Contains(
"Cuts")) <<
Endl;
1125 Log() << kFATAL<<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"<TestInit> Test variable " << GetTestvarName()
1126 <<
" not found in tree" <<
Endl;
1131 fMeanS, fMeanB, fRmsS, fRmsB, fXmin, fXmax, fSignalClass );
1139 fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
1151 TestvarName=
Form(
"[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1154 TestvarName=GetTestvarName();
1156 TH1* mva_s =
new TH1D( TestvarName +
"_S",TestvarName +
"_S", fNbinsMVAoutput, fXmin, sxmax );
1157 TH1* mva_b =
new TH1D( TestvarName +
"_B",TestvarName +
"_B", fNbinsMVAoutput, fXmin, sxmax );
1158 mvaRes->
Store(mva_s,
"MVA_S");
1159 mvaRes->
Store(mva_b,
"MVA_B");
1169 proba_s =
new TH1D( TestvarName +
"_Proba_S", TestvarName +
"_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1170 proba_b =
new TH1D( TestvarName +
"_Proba_B", TestvarName +
"_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1171 mvaRes->
Store(proba_s,
"Prob_S");
1172 mvaRes->
Store(proba_b,
"Prob_B");
1177 rarity_s =
new TH1D( TestvarName +
"_Rarity_S", TestvarName +
"_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1178 rarity_b =
new TH1D( TestvarName +
"_Rarity_B", TestvarName +
"_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1179 mvaRes->
Store(rarity_s,
"Rar_S");
1180 mvaRes->
Store(rarity_b,
"Rar_B");
1186 TH1* mva_eff_s =
new TH1D( TestvarName +
"_S_high", TestvarName +
"_S_high", fNbinsH, fXmin, sxmax );
1187 TH1* mva_eff_b =
new TH1D( TestvarName +
"_B_high", TestvarName +
"_B_high", fNbinsH, fXmin, sxmax );
1188 mvaRes->
Store(mva_eff_s,
"MVA_HIGHBIN_S");
1189 mvaRes->
Store(mva_eff_b,
"MVA_HIGHBIN_B");
1198 Log() << kHEADER <<
Form(
"[%s] : ",DataInfo().GetName())<<
"Loop over test events and fill histograms with classifier response..." <<
Endl <<
Endl;
1199 if (mvaProb) Log() << kINFO <<
"Also filling probability and rarity histograms (on request)..." <<
Endl;
1203 if ( mvaRes->
GetSize() != GetNEvents() ) {
1204 Log() << kFATAL <<
TString::Format(
"Inconsistent result size %lld with number of events %u ", mvaRes->
GetSize() , GetNEvents() ) <<
Endl;
1205 assert(mvaRes->
GetSize() == GetNEvents());
1208 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1210 const Event* ev = GetEvent(ievt);
1214 if (DataInfo().IsSignal(ev)) {
1216 mva_s ->
Fill(
v, w );
1218 proba_s->
Fill( (*mvaProb)[ievt][0], w );
1219 rarity_s->
Fill( GetRarity(
v ), w );
1222 mva_eff_s ->
Fill(
v, w );
1226 mva_b ->
Fill(
v, w );
1228 proba_b->
Fill( (*mvaProb)[ievt][0], w );
1229 rarity_b->
Fill( GetRarity(
v ), w );
1231 mva_eff_b ->
Fill(
v, w );
1246 if (fSplS) {
delete fSplS; fSplS = 0; }
1247 if (fSplB) {
delete fSplB; fSplB = 0; }
1261 tf << prefix <<
"#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1262 tf << prefix <<
"Method : " << GetMethodTypeName() <<
"::" << GetMethodName() << std::endl;
1263 tf.setf(std::ios::left);
1264 tf << prefix <<
"TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() <<
" ["
1265 << GetTrainingTMVAVersionCode() <<
"]" << std::endl;
1266 tf << prefix <<
"ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() <<
" ["
1267 << GetTrainingROOTVersionCode() <<
"]" << std::endl;
1268 tf << prefix <<
"Creator : " << userInfo->
fUser << std::endl;
1272 tf << prefix <<
"Training events: " << Data()->GetNTrainingEvents() << std::endl;
1276 tf << prefix <<
"Analysis type : " <<
"[" << ((GetAnalysisType()==
Types::kRegression) ?
"Regression" :
"Classification") <<
"]" << std::endl;
1277 tf << prefix << std::endl;
1282 tf << prefix << std::endl << prefix <<
"#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1283 WriteOptionsToStream( tf, prefix );
1284 tf << prefix << std::endl;
1287 tf << prefix << std::endl << prefix <<
"#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1288 WriteVarsToStream( tf, prefix );
1289 tf << prefix << std::endl;
1306 AddRegressionOutput(
type );
1308 AddMulticlassOutput(
type );
1310 AddClassifierOutput(
type );
1312 AddClassifierOutputProb(
type );
1322 if (!parent)
return;
1327 AddInfoItem( gi,
"TMVA Release", GetTrainingTMVAVersionString() +
" [" +
gTools().StringFromInt(GetTrainingTMVAVersionCode()) +
"]" );
1328 AddInfoItem( gi,
"ROOT Release", GetTrainingROOTVersionString() +
" [" +
gTools().StringFromInt(GetTrainingROOTVersionCode()) +
"]");
1329 AddInfoItem( gi,
"Creator", userInfo->
fUser);
1333 AddInfoItem( gi,
"Training events",
gTools().StringFromInt(Data()->GetNTrainingEvents()));
1334 AddInfoItem( gi,
"TrainingTime",
gTools().StringFromDouble(
const_cast<TMVA::MethodBase*
>(
this)->GetTrainTime()));
1339 AddInfoItem( gi,
"AnalysisType", analysisType );
1343 AddOptionsXMLTo( parent );
1346 AddVarsXMLTo( parent );
1349 if (fModelPersistence)
1350 AddSpectatorsXMLTo( parent );
1353 AddClassesXMLTo(parent);
1356 if (DoRegression()) AddTargetsXMLTo(parent);
1359 GetTransformationHandler(
false).AddXMLTo( parent );
1363 if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1364 if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1367 AddWeightsXMLTo( parent );
1383 ReadWeightsFromStream( rf );
1396 TString tfname( GetWeightFileName() );
1401 <<
"Creating xml weight file: "
1406 gTools().
AddAttr(rootnode,
"Method", GetMethodTypeName() +
"::" + GetMethodName());
1407 WriteStateToXML(rootnode);
1419 TString tfname(GetWeightFileName());
1422 <<
"Reading weight file: "
1428 Log() << kFATAL <<
"Error parsing XML file " << tfname <<
Endl;
1431 ReadStateFromXML(rootnode);
1436 fb.open(tfname.
Data(),std::ios::in);
1437 if (!fb.is_open()) {
1438 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<ReadStateFromFile> "
1439 <<
"Unable to open input weight file: " << tfname <<
Endl;
1441 std::istream fin(&fb);
1442 ReadStateFromStream(fin);
1445 if (!fTxtWeightsOnly) {
1448 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Reading root weight file: "
1451 ReadStateFromStream( *rfile );
1461 ReadStateFromXML(rootnode);
1475 fMethodName = fullMethodName(fullMethodName.
Index(
"::")+2,fullMethodName.
Length());
1478 Log().SetSource( GetName() );
1480 <<
"Read method \"" << GetMethodName() <<
"\" of type \"" << GetMethodTypeName() <<
"\"" <<
Endl;
1490 if (nodeName==
"GeneralInfo") {
1495 while (antypeNode) {
1498 if (
name ==
"TrainingTime")
1501 if (
name ==
"AnalysisType") {
1507 else Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Analysis type " << val <<
" is not known." <<
Endl;
1510 if (
name ==
"TMVA Release" ||
name ==
"TMVA") {
1514 Log() << kDEBUG <<
Form(
"[%s] : ",DataInfo().GetName()) <<
"MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() <<
Endl;
1517 if (
name ==
"ROOT Release" ||
name ==
"ROOT") {
1522 <<
"MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() <<
Endl;
1527 else if (nodeName==
"Options") {
1528 ReadOptionsFromXML(ch);
1532 else if (nodeName==
"Variables") {
1533 ReadVariablesFromXML(ch);
1535 else if (nodeName==
"Spectators") {
1536 ReadSpectatorsFromXML(ch);
1538 else if (nodeName==
"Classes") {
1539 if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1541 else if (nodeName==
"Targets") {
1542 if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1544 else if (nodeName==
"Transformations") {
1545 GetTransformationHandler().ReadFromXML(ch);
1547 else if (nodeName==
"MVAPdfs") {
1549 if (fMVAPdfS) {
delete fMVAPdfS; fMVAPdfS=0; }
1550 if (fMVAPdfB) {
delete fMVAPdfB; fMVAPdfB=0; }
1554 fMVAPdfS =
new PDF(pdfname);
1555 fMVAPdfS->ReadXML(pdfnode);
1558 fMVAPdfB =
new PDF(pdfname);
1559 fMVAPdfB->ReadXML(pdfnode);
1562 else if (nodeName==
"Weights") {
1563 ReadWeightsFromXML(ch);
1566 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Unparsed XML node: '" << nodeName <<
"'" <<
Endl;
1573 if (GetTransformationHandler().GetCallerName() ==
"") GetTransformationHandler().SetCallerName( GetName() );
1589 while (!
TString(buf).BeginsWith(
"Method")) GetLine(fin,buf);
1593 methodType = methodType(methodType.
Last(
' '),methodType.
Length());
1598 if (methodName ==
"") methodName = methodType;
1599 fMethodName = methodName;
1601 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Read method \"" << GetMethodName() <<
"\" of type \"" << GetMethodTypeName() <<
"\"" <<
Endl;
1604 Log().SetSource( GetName() );
1618 while (!
TString(buf).BeginsWith(
"#OPT")) GetLine(fin,buf);
1619 ReadOptionsFromStream(fin);
1623 fin.getline(buf,512);
1624 while (!
TString(buf).BeginsWith(
"#VAR")) fin.getline(buf,512);
1625 ReadVarsFromStream(fin);
1630 if (IsNormalised()) {
1636 if ( fVarTransformString ==
"None") {
1639 }
else if ( fVarTransformString ==
"Decorrelate" ) {
1641 }
else if ( fVarTransformString ==
"PCA" ) {
1642 varTrafo = GetTransformationHandler().AddTransformation(
new VariablePCATransform(DataInfo()), -1 );
1643 }
else if ( fVarTransformString ==
"Uniform" ) {
1644 varTrafo = GetTransformationHandler().AddTransformation(
new VariableGaussTransform(DataInfo(),
"Uniform"), -1 );
1645 }
else if ( fVarTransformString ==
"Gauss" ) {
1647 }
else if ( fVarTransformString ==
"GaussDecorr" ) {
1651 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<ProcessOptions> Variable transform '"
1652 << fVarTransformString <<
"' unknown." <<
Endl;
1655 if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1656 fin.getline(buf,512);
1657 while (!
TString(buf).BeginsWith(
"#MAT")) fin.getline(buf,512);
1660 varTrafo->ReadTransformationFromStream(fin, trafo );
1671 fin.getline(buf,512);
1672 while (!
TString(buf).BeginsWith(
"#MVAPDFS")) fin.getline(buf,512);
1673 if (fMVAPdfS != 0) {
delete fMVAPdfS; fMVAPdfS = 0; }
1674 if (fMVAPdfB != 0) {
delete fMVAPdfB; fMVAPdfB = 0; }
1675 fMVAPdfS =
new PDF(
TString(GetName()) +
" MVA PDF Sig");
1676 fMVAPdfB =
new PDF(
TString(GetName()) +
" MVA PDF Bkg");
1677 fMVAPdfS->SetReadingVersion( GetTrainingTMVAVersionCode() );
1678 fMVAPdfB->SetReadingVersion( GetTrainingTMVAVersionCode() );
1685 fin.getline(buf,512);
1686 while (!
TString(buf).BeginsWith(
"#WGT")) fin.getline(buf,512);
1687 fin.getline(buf,512);
1688 ReadWeightsFromStream( fin );;
1691 if (GetTransformationHandler().GetCallerName() ==
"") GetTransformationHandler().SetCallerName( GetName() );
1701 o << prefix <<
"NVar " << DataInfo().GetNVariables() << std::endl;
1702 std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1703 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1704 o << prefix <<
"NSpec " << DataInfo().GetNSpectators() << std::endl;
1705 varIt = DataInfo().GetSpectatorInfos().begin();
1706 for (; varIt!=DataInfo().GetSpectatorInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1718 istr >> dummy >> readNVar;
1720 if (readNVar!=DataInfo().GetNVariables()) {
1721 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"You declared "<< DataInfo().GetNVariables() <<
" variables in the Reader"
1722 <<
" while there are " << readNVar <<
" variables declared in the file"
1728 std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1730 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt, ++varIdx) {
1737 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"ERROR in <ReadVarsFromStream>" <<
Endl;
1738 Log() << kINFO <<
"The definition (or the order) of the variables found in the input file is" <<
Endl;
1739 Log() << kINFO <<
"is not the same as the one declared in the Reader (which is necessary for" <<
Endl;
1740 Log() << kINFO <<
"the correct working of the method):" <<
Endl;
1741 Log() << kINFO <<
" var #" << varIdx <<
" declared in Reader: " << varIt->GetExpression() <<
Endl;
1742 Log() << kINFO <<
" var #" << varIdx <<
" declared in file : " << varInfo.
GetExpression() <<
Endl;
1743 Log() << kFATAL <<
"The expression declared to the Reader needs to be checked (name or order are wrong)" <<
Endl;
1756 for (
UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1772 for (
UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1774 VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1792 UInt_t nClasses=DataInfo().GetNClasses();
1797 for (
UInt_t iCls=0; iCls<nClasses; ++iCls) {
1798 ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1815 for (
UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1831 if (readNVar!=DataInfo().GetNVariables()) {
1832 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"You declared "<< DataInfo().GetNVariables() <<
" variables in the Reader"
1833 <<
" while there are " << readNVar <<
" variables declared in the file"
1843 existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1848 existingVarInfo = readVarInfo;
1851 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"ERROR in <ReadVariablesFromXML>" <<
Endl;
1852 Log() << kINFO <<
"The definition (or the order) of the variables found in the input file is" <<
Endl;
1853 Log() << kINFO <<
"not the same as the one declared in the Reader (which is necessary for the" <<
Endl;
1854 Log() << kINFO <<
"correct working of the method):" <<
Endl;
1855 Log() << kINFO <<
" var #" << varIdx <<
" declared in Reader: " << existingVarInfo.
GetExpression() <<
Endl;
1856 Log() << kINFO <<
" var #" << varIdx <<
" declared in file : " << readVarInfo.
GetExpression() <<
Endl;
1857 Log() << kFATAL <<
"The expression declared to the Reader needs to be checked (name or order are wrong)" <<
Endl;
1871 if (readNSpec!=DataInfo().GetNSpectators(
kFALSE)) {
1872 Log() << kFATAL<<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"You declared "<< DataInfo().GetNSpectators(
kFALSE) <<
" spectators in the Reader"
1873 <<
" while there are " << readNSpec <<
" spectators declared in the file"
1883 existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1888 existingSpecInfo = readSpecInfo;
1891 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"ERROR in <ReadSpectatorsFromXML>" <<
Endl;
1892 Log() << kINFO <<
"The definition (or the order) of the spectators found in the input file is" <<
Endl;
1893 Log() << kINFO <<
"not the same as the one declared in the Reader (which is necessary for the" <<
Endl;
1894 Log() << kINFO <<
"correct working of the method):" <<
Endl;
1895 Log() << kINFO <<
" spec #" << specIdx <<
" declared in Reader: " << existingSpecInfo.
GetExpression() <<
Endl;
1896 Log() << kINFO <<
" spec #" << specIdx <<
" declared in file : " << readSpecInfo.
GetExpression() <<
Endl;
1897 Log() << kFATAL <<
"The expression declared to the Reader needs to be checked (name or order are wrong)" <<
Endl;
1916 for (
UInt_t icls = 0; icls<readNCls;++icls) {
1918 DataInfo().AddClass(classname);
1926 DataInfo().AddClass(className);
1933 if (DataInfo().GetClassInfo(
"Signal") != 0) {
1934 fSignalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
1938 if (DataInfo().GetClassInfo(
"Background") != 0) {
1939 fBackgroundClass = DataInfo().GetClassInfo(
"Background")->GetNumber();
1959 DataInfo().AddTarget(expression,
"",
"",0,0);
1971 if (fBaseDir != 0)
return fBaseDir;
1972 Log()<<kDEBUG<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
" Base Directory for " << GetMethodName() <<
" not set yet --> check if already there.." <<
Endl;
1974 if (IsSilentFile()) {
1975 Log() << kFATAL <<
Form(
"Dataset[%s] : ", DataInfo().GetName())
1976 <<
"MethodBase::BaseDir() - No directory exists when running a Method without output file. Enable the "
1977 "output when creating the factory"
1983 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" <<
Endl;
1985 TString defaultDir = GetMethodName();
1989 Log()<<kDEBUG<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
" Base Directory for " << GetMethodTypeName() <<
" does not exist yet--> created it" <<
Endl;
1990 sdir = methodDir->
mkdir(defaultDir);
1993 if (fModelPersistence) {
1996 wfilePath.
Write(
"TrainingPath" );
1997 wfileName.
Write(
"WeightFileName" );
2001 Log()<<kDEBUG<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
" Base Directory for " << GetMethodTypeName() <<
" existed, return it.." <<
Endl;
2011 if (fMethodBaseDir != 0) {
2012 return fMethodBaseDir;
2015 const char *datasetName = DataInfo().
GetName();
2017 Log() << kDEBUG <<
Form(
"Dataset[%s] : ", datasetName) <<
" Base Directory for " << GetMethodTypeName()
2018 <<
" not set yet --> check if already there.." <<
Endl;
2021 if (!factoryBaseDir)
return nullptr;
2022 fMethodBaseDir = factoryBaseDir->
GetDirectory(datasetName);
2023 if (!fMethodBaseDir) {
2024 fMethodBaseDir = factoryBaseDir->
mkdir(datasetName,
Form(
"Base directory for dataset %s", datasetName));
2025 if (!fMethodBaseDir) {
2026 Log() << kFATAL <<
"Can not create dir " << datasetName;
2029 TString methodTypeDir =
Form(
"Method_%s", GetMethodTypeName().Data());
2030 fMethodBaseDir = fMethodBaseDir->GetDirectory(methodTypeDir.
Data());
2032 if (!fMethodBaseDir) {
2034 TString methodTypeDirHelpStr =
Form(
"Directory for all %s methods", GetMethodTypeName().Data());
2035 fMethodBaseDir = datasetDir->
mkdir(methodTypeDir.
Data(), methodTypeDirHelpStr);
2036 Log() << kDEBUG <<
Form(
"Dataset[%s] : ", datasetName) <<
" Base Directory for " << GetMethodName()
2037 <<
" does not exist yet--> created it" <<
Endl;
2040 Log() << kDEBUG <<
Form(
"Dataset[%s] : ", datasetName)
2041 <<
"Return from MethodBaseDir() after creating base directory " <<
Endl;
2042 return fMethodBaseDir;
2059 fWeightFile = theWeightFile;
2067 if (fWeightFile!=
"")
return fWeightFile;
2072 TString wFileDir(GetWeightFileDir());
2073 TString wFileName = GetJobName() +
"_" + GetMethodName() +
2075 if (wFileDir.
IsNull() )
return wFileName;
2077 return ( wFileDir + (wFileDir[wFileDir.
Length()-1]==
'/' ?
"" :
"/")
2089 if (0 != fMVAPdfS) {
2090 fMVAPdfS->GetOriginalHist()->Write();
2091 fMVAPdfS->GetSmoothedHist()->Write();
2092 fMVAPdfS->GetPDFHist()->Write();
2094 if (0 != fMVAPdfB) {
2095 fMVAPdfB->GetOriginalHist()->Write();
2096 fMVAPdfB->GetSmoothedHist()->Write();
2097 fMVAPdfB->GetPDFHist()->Write();
2103 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<WriteEvaluationHistosToFile> Unknown result: "
2104 << GetMethodName() << (treetype==
Types::kTraining?
"/kTraining":
"/kTesting")
2105 <<
"/kMaxAnalysisType" <<
Endl;
2110 GetTransformationHandler().PlotVariables (GetEventCollection(
Types::kTesting ), BaseDir() );
2112 Log() << kINFO <<
TString::Format(
"Dataset[%s] : ",DataInfo().GetName())
2113 <<
" variable plots are not produces ! The number of variables is " << DataInfo().GetNVariables()
2133 fin.getline(buf,512);
2135 if (
line.BeginsWith(
"TMVA Release")) {
2139 std::stringstream s(code.
Data());
2140 s >> fTMVATrainingVersion;
2141 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() <<
Endl;
2143 if (
line.BeginsWith(
"ROOT Release")) {
2147 std::stringstream s(code.
Data());
2148 s >> fROOTTrainingVersion;
2149 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() <<
Endl;
2151 if (
line.BeginsWith(
"Analysis type")) {
2155 std::stringstream s(code.
Data());
2156 std::string analysisType;
2158 if (analysisType ==
"regression" || analysisType ==
"Regression") SetAnalysisType(
Types::kRegression );
2159 else if (analysisType ==
"classification" || analysisType ==
"Classification") SetAnalysisType(
Types::kClassification );
2160 else if (analysisType ==
"multiclass" || analysisType ==
"Multiclass") SetAnalysisType(
Types::kMulticlass );
2161 else Log() << kFATAL <<
"Analysis type " << analysisType <<
" from weight-file not known!" << std::endl;
2163 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Method was trained for "
2183 if (mvaRes==0 || mvaRes->
GetSize()==0) {
2184 Log() << kERROR<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<CreateMVAPdfs> No result of classifier testing available" <<
Endl;
2191 TH1* histMVAPdfS =
new TH1D( GetMethodTypeName() +
"_tr_S", GetMethodTypeName() +
"_tr_S",
2192 fMVAPdfS->GetHistNBins( mvaRes->
GetSize() ), minVal, maxVal );
2193 TH1* histMVAPdfB =
new TH1D( GetMethodTypeName() +
"_tr_B", GetMethodTypeName() +
"_tr_B",
2194 fMVAPdfB->GetHistNBins( mvaRes->
GetSize() ), minVal, maxVal );
2198 histMVAPdfS->
Sumw2();
2199 histMVAPdfB->
Sumw2();
2204 Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2206 if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->
Fill( theVal, theWeight );
2207 else histMVAPdfB->
Fill( theVal, theWeight );
2216 histMVAPdfS->
Write();
2217 histMVAPdfB->
Write();
2220 fMVAPdfS->BuildPDF ( histMVAPdfS );
2221 fMVAPdfB->BuildPDF ( histMVAPdfB );
2222 fMVAPdfS->ValidatePDF( histMVAPdfS );
2223 fMVAPdfB->ValidatePDF( histMVAPdfB );
2225 if (DataInfo().GetNClasses() == 2) {
2226 Log() << kINFO<<
Form(
"Dataset[%s] : ",DataInfo().GetName())
2227 <<
Form(
"<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2228 GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2240 if (!fMVAPdfS || !fMVAPdfB) {
2241 Log() << kINFO<<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" <<
Endl;
2244 Double_t sigFraction = DataInfo().GetTrainingSumSignalWeights() / (DataInfo().GetTrainingSumSignalWeights() + DataInfo().GetTrainingSumBackgrWeights() );
2247 return GetProba(mvaVal,sigFraction);
2255 if (!fMVAPdfS || !fMVAPdfB) {
2256 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetProba> MVA PDFs for Signal and Background don't exist" <<
Endl;
2259 Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2260 Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2262 Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2264 return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2277 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2278 <<
"select option \"CreateMVAPdfs\"" <<
Endl;
2293 Data()->SetCurrentType(
type);
2303 else if (list->
GetSize() > 2) {
2304 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetEfficiency> Wrong number of arguments"
2305 <<
" in string: " << theString
2306 <<
" | required format, e.g., Efficiency:0.05, or empty string" <<
Endl;
2314 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetEfficiency> Binning mismatch between signal and background histos" <<
Endl;
2322 TH1 * effhist = results->
GetHist(
"MVA_HIGHBIN_S");
2329 if (results->
DoesExist(
"MVA_EFF_S")==0) {
2332 TH1* eff_s =
new TH1D( GetTestvarName() +
"_effS", GetTestvarName() +
" (signal)", fNbinsH,
xmin,
xmax );
2333 TH1* eff_b =
new TH1D( GetTestvarName() +
"_effB", GetTestvarName() +
" (background)", fNbinsH,
xmin,
xmax );
2334 results->
Store(eff_s,
"MVA_EFF_S");
2335 results->
Store(eff_b,
"MVA_EFF_B");
2338 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2342 for (
UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2345 Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2346 Float_t theWeight = GetEvent(ievt)->GetWeight();
2347 Float_t theVal = (*mvaRes)[ievt];
2350 TH1* theHist = isSignal ? eff_s : eff_b;
2353 if (isSignal) nevtS+=theWeight;
2357 if (sign > 0 && maxbin > fNbinsH)
continue;
2358 if (sign < 0 && maxbin < 1 )
continue;
2359 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2360 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2365 for (
Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->
AddBinContent( ibin , theWeight );
2367 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetEfficiency> Mismatch in sign" <<
Endl;
2378 TH1* eff_BvsS =
new TH1D( GetTestvarName() +
"_effBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
2379 results->
Store(eff_BvsS,
"MVA_EFF_BvsS");
2384 TH1* rej_BvsS =
new TH1D( GetTestvarName() +
"_rejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
2385 results->
Store(rej_BvsS);
2387 rej_BvsS->
SetYTitle(
"Backgr rejection (1-eff)" );
2390 TH1* inveff_BvsS =
new TH1D( GetTestvarName() +
"_invBeffvsSeff",
2391 GetTestvarName(), fNbins, 0, 1 );
2392 results->
Store(inveff_BvsS);
2394 inveff_BvsS->
SetYTitle(
"Inverse backgr. eff (1/eff)" );
2400 fSplRefS =
new TSpline1(
"spline2_signal",
new TGraph( eff_s ) );
2401 fSplRefB =
new TSpline1(
"spline2_background",
new TGraph( eff_b ) );
2415 for (
Int_t bini=1; bini<=fNbins; bini++) {
2428 if (effB>std::numeric_limits<double>::epsilon())
2437 Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2438 Int_t nbins_ = 5000;
2439 for (
Int_t bini=1; bini<=nbins_; bini++) {
2442 effS = (bini - 0.5)/
Float_t(nbins_);
2443 rejB = 1.0 - fSpleffBvsS->Eval( effS );
2446 if ((effS - rejB)*(effS_ - rejB_) < 0)
break;
2453 SetSignalReferenceCut( cut );
2458 if (0 == fSpleffBvsS) {
2464 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2465 Int_t nbins_ = 1000;
2471 for (
Int_t bini=1; bini<=nbins_; bini++) {
2474 effS = (bini - 0.5)/
Float_t(nbins_);
2475 effB = fSpleffBvsS->Eval( effS );
2476 integral += (1.0 - effB);
2490 for (
Int_t bini=1; bini<=nbins_; bini++) {
2493 effS = (bini - 0.5)/
Float_t(nbins_);
2494 effB = fSpleffBvsS->Eval( effS );
2497 if ((effB - effBref)*(effB_ - effBref) <= 0)
break;
2503 effS = 0.5*(effS + effS_);
2506 if (nevtS > 0) effSerr =
TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2531 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetTrainingEfficiency> Wrong number of arguments"
2532 <<
" in string: " << theString
2533 <<
" | required format, e.g., Efficiency:0.05" <<
Endl;
2546 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2554 TH1 * effhist = results->
GetHist(
"MVA_HIGHBIN_S");
2559 if (results->
DoesExist(
"MVA_TRAIN_S")==0) {
2565 TH1* mva_s_tr =
new TH1D( GetTestvarName() +
"_Train_S",GetTestvarName() +
"_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2566 TH1* mva_b_tr =
new TH1D( GetTestvarName() +
"_Train_B",GetTestvarName() +
"_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2567 results->
Store(mva_s_tr,
"MVA_TRAIN_S");
2568 results->
Store(mva_b_tr,
"MVA_TRAIN_B");
2573 TH1* mva_eff_tr_s =
new TH1D( GetTestvarName() +
"_trainingEffS", GetTestvarName() +
" (signal)",
2575 TH1* mva_eff_tr_b =
new TH1D( GetTestvarName() +
"_trainingEffB", GetTestvarName() +
" (background)",
2577 results->
Store(mva_eff_tr_s,
"MVA_TRAINEFF_S");
2578 results->
Store(mva_eff_tr_b,
"MVA_TRAINEFF_B");
2581 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2583 std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2584 assert( (
Long64_t) mvaValues.size() == Data()->GetNEvents());
2587 for (
Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2589 Data()->SetCurrentEvent(ievt);
2590 const Event* ev = GetEvent();
2595 TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2596 TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2598 theClsHist->
Fill( theVal, theWeight );
2602 if (sign > 0 && maxbin > fNbinsH)
continue;
2603 if (sign < 0 && maxbin < 1 )
continue;
2604 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2605 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2607 if (sign > 0)
for (
Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->
AddBinContent( ibin , theWeight );
2608 else for (
Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->
AddBinContent( ibin , theWeight );
2621 TH1* eff_bvss =
new TH1D( GetTestvarName() +
"_trainingEffBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
2623 TH1* rej_bvss =
new TH1D( GetTestvarName() +
"_trainingRejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
2624 results->
Store(eff_bvss,
"EFF_BVSS_TR");
2625 results->
Store(rej_bvss,
"REJ_BVSS_TR");
2631 if (fSplTrainRefS)
delete fSplTrainRefS;
2632 if (fSplTrainRefB)
delete fSplTrainRefB;
2633 fSplTrainRefS =
new TSpline1(
"spline2_signal",
new TGraph( mva_eff_tr_s ) );
2634 fSplTrainRefB =
new TSpline1(
"spline2_background",
new TGraph( mva_eff_tr_b ) );
2647 fEffS = results->
GetHist(
"MVA_TRAINEFF_S");
2648 for (
Int_t bini=1; bini<=fNbins; bini++) {
2666 fSplTrainEffBvsS =
new TSpline1(
"effBvsS",
new TGraph( eff_bvss ) );
2670 if (0 == fSplTrainEffBvsS)
return 0.0;
2673 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2674 Int_t nbins_ = 1000;
2675 for (
Int_t bini=1; bini<=nbins_; bini++) {
2678 effS = (bini - 0.5)/
Float_t(nbins_);
2679 effB = fSplTrainEffBvsS->Eval( effS );
2682 if ((effB - effBref)*(effB_ - effBref) <= 0)
break;
2687 return 0.5*(effS + effS_);
2696 if (!resMulticlass) Log() << kFATAL<<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"unable to create pointer in GetMulticlassEfficiency, exiting."<<
Endl;
2708 if (!resMulticlass) Log() << kFATAL<<
"unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<
Endl;
2710 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Determine optimal multiclass cuts for training data..." <<
Endl;
2711 for (
UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2742 Log() << kFATAL <<
"Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2746 Data()->SetCurrentType(
type);
2750 if (resMulticlass ==
nullptr) {
2751 Log() << kFATAL <<
Form(
"Dataset[%s] : ", DataInfo().GetName())
2752 <<
"unable to create pointer in GetMulticlassEfficiency, exiting." <<
Endl;
2769 return (rms > 0) ?
TMath::Abs(fMeanS - fMeanB)/rms : 0;
2793 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2794 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetSeparation> Mismatch in pdfs" <<
Endl;
2795 if (!pdfS) pdfS = fSplS;
2796 if (!pdfB) pdfB = fSplB;
2798 if (!fSplS || !fSplB) {
2799 Log()<<kDEBUG<<
Form(
"[%s] : ",DataInfo().GetName())<<
"could not calculate the separation, distributions"
2800 <<
" fSplS or fSplB are not yet filled" <<
Endl;
2815 if ((!histS && histB) || (histS && !histB))
2816 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" <<
Endl;
2818 if (histS==0 || histB==0)
return 0.;
2831 for (
UInt_t i=0; i<nsteps; i++) {
2837 return integral*step;
2849 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2850 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetSeparation> Mismatch in pdfs" <<
Endl;
2851 if (!pdfS) pdfS = fSplS;
2852 if (!pdfB) pdfB = fSplB;
2854 if (pdfS==0 || pdfB==0)
return 0.;
2863 for (
UInt_t i=0; i<nsteps; i++) {
2867 return integral*step;
2877 Double_t& max_significance_value )
const
2882 Double_t effS(0),effB(0),significance(0);
2883 TH1D *temp_histogram =
new TH1D(
"temp",
"temp", fNbinsH, fXmin, fXmax );
2885 if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2886 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<GetMaximumSignificance> "
2887 <<
"Number of signal or background events is <= 0 ==> abort"
2891 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Using ratio SignalEvents/BackgroundEvents = "
2892 << SignalEvents/BackgroundEvents <<
Endl;
2897 if ( (eff_s==0) || (eff_b==0) ) {
2898 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Efficiency histograms empty !" <<
Endl;
2899 Log() << kWARNING <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"no maximum cut found, return 0" <<
Endl;
2903 for (
Int_t bin=1; bin<=fNbinsH; bin++) {
2908 significance =
sqrt(SignalEvents)*( effS )/
sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
2918 delete temp_histogram;
2920 Log() << kINFO <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Optimal cut at : " << max_significance <<
Endl;
2921 Log() << kINFO<<
Form(
"Dataset[%s] : ",DataInfo().GetName()) <<
"Maximum significance: " << max_significance_value <<
Endl;
2923 return max_significance;
2937 Data()->SetCurrentType(treeType);
2939 Long64_t entries = Data()->GetNEvents();
2943 Log() << kFATAL <<
Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"<CalculateEstimator> Wrong tree type: " << treeType <<
Endl;
2946 UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
2962 for (
Int_t ievt = 0; ievt < entries; ievt++) {
2964 const Event* ev = GetEvent(ievt);
2969 if (DataInfo().IsSignal(ev)) {
2971 meanS += weight*theVar;
2972 rmsS += weight*theVar*theVar;
2976 meanB += weight*theVar;
2977 rmsB += weight*theVar*theVar;
2985 meanS = meanS/sumwS;
2986 meanB = meanB/sumwB;
2990 Data()->SetCurrentType(previousTreeType);
3000 if (theClassFileName ==
"")
3001 classFileName = GetWeightFileDir() +
"/" + GetJobName() +
"_" + GetMethodName() +
".class.C";
3003 classFileName = theClassFileName;
3007 TString tfname( classFileName );
3009 <<
"Creating standalone class: "
3012 std::ofstream fout( classFileName );
3014 Log() << kFATAL <<
"<MakeClass> Unable to open file: " << classFileName <<
Endl;
3019 fout <<
"// Class: " << className << std::endl;
3020 fout <<
"// Automatically generated by MethodBase::MakeClass" << std::endl <<
"//" << std::endl;
3024 fout <<
"/* configuration options =====================================================" << std::endl << std::endl;
3025 WriteStateToStream( fout );
3027 fout <<
"============================================================================ */" << std::endl;
3030 fout <<
"" << std::endl;
3031 fout <<
"#include <array>" << std::endl;
3032 fout <<
"#include <vector>" << std::endl;
3033 fout <<
"#include <cmath>" << std::endl;
3034 fout <<
"#include <string>" << std::endl;
3035 fout <<
"#include <iostream>" << std::endl;
3036 fout <<
"" << std::endl;
3039 this->MakeClassSpecificHeader( fout, className );
3041 fout <<
"#ifndef IClassifierReader__def" << std::endl;
3042 fout <<
"#define IClassifierReader__def" << std::endl;
3044 fout <<
"class IClassifierReader {" << std::endl;
3046 fout <<
" public:" << std::endl;
3048 fout <<
" // constructor" << std::endl;
3049 fout <<
" IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
3050 fout <<
" virtual ~IClassifierReader() {}" << std::endl;
3052 fout <<
" // return classifier response" << std::endl;
3054 fout <<
" virtual std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3056 fout <<
" virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3059 fout <<
" // returns classifier status" << std::endl;
3060 fout <<
" bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3062 fout <<
" protected:" << std::endl;
3064 fout <<
" bool fStatusIsClean;" << std::endl;
3065 fout <<
"};" << std::endl;
3067 fout <<
"#endif" << std::endl;
3069 fout <<
"class " << className <<
" : public IClassifierReader {" << std::endl;
3071 fout <<
" public:" << std::endl;
3073 fout <<
" // constructor" << std::endl;
3074 fout <<
" " << className <<
"( std::vector<std::string>& theInputVars )" << std::endl;
3075 fout <<
" : IClassifierReader()," << std::endl;
3076 fout <<
" fClassName( \"" << className <<
"\" )," << std::endl;
3077 fout <<
" fNvars( " << GetNvar() <<
" )" << std::endl;
3078 fout <<
" {" << std::endl;
3079 fout <<
" // the training input variables" << std::endl;
3080 fout <<
" const char* inputVars[] = { ";
3081 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3082 fout <<
"\"" << GetOriginalVarName(ivar) <<
"\"";
3083 if (ivar<GetNvar()-1) fout <<
", ";
3085 fout <<
" };" << std::endl;
3087 fout <<
" // sanity checks" << std::endl;
3088 fout <<
" if (theInputVars.size() <= 0) {" << std::endl;
3089 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3090 fout <<
" fStatusIsClean = false;" << std::endl;
3091 fout <<
" }" << std::endl;
3093 fout <<
" if (theInputVars.size() != fNvars) {" << std::endl;
3094 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3095 fout <<
" << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3096 fout <<
" fStatusIsClean = false;" << std::endl;
3097 fout <<
" }" << std::endl;
3099 fout <<
" // validate input variables" << std::endl;
3100 fout <<
" for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3101 fout <<
" if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3102 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3103 fout <<
" << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3104 fout <<
" fStatusIsClean = false;" << std::endl;
3105 fout <<
" }" << std::endl;
3106 fout <<
" }" << std::endl;
3108 fout <<
" // initialize min and max vectors (for normalisation)" << std::endl;
3109 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3110 fout <<
" fVmin[" << ivar <<
"] = " << std::setprecision(15) << GetXmin( ivar ) <<
";" << std::endl;
3111 fout <<
" fVmax[" << ivar <<
"] = " << std::setprecision(15) << GetXmax( ivar ) <<
";" << std::endl;
3114 fout <<
" // initialize input variable types" << std::endl;
3115 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3116 fout <<
" fType[" << ivar <<
"] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() <<
"\';" << std::endl;
3119 fout <<
" // initialize constants" << std::endl;
3120 fout <<
" Initialize();" << std::endl;
3122 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3123 fout <<
" // initialize transformation" << std::endl;
3124 fout <<
" InitTransform();" << std::endl;
3126 fout <<
" }" << std::endl;
3128 fout <<
" // destructor" << std::endl;
3129 fout <<
" virtual ~" << className <<
"() {" << std::endl;
3130 fout <<
" Clear(); // method-specific" << std::endl;
3131 fout <<
" }" << std::endl;
3133 fout <<
" // the classifier response" << std::endl;
3134 fout <<
" // \"inputValues\" is a vector of input values in the same order as the" << std::endl;
3135 fout <<
" // variables given to the constructor" << std::endl;
3137 fout <<
" std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const override;" << std::endl;
3139 fout <<
" double GetMvaValue( const std::vector<double>& inputValues ) const override;" << std::endl;
3142 fout <<
" private:" << std::endl;
3144 fout <<
" // method-specific destructor" << std::endl;
3145 fout <<
" void Clear();" << std::endl;
3147 if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3148 fout <<
" // input variable transformation" << std::endl;
3149 GetTransformationHandler().MakeFunction(fout, className,1);
3150 fout <<
" void InitTransform();" << std::endl;
3151 fout <<
" void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3154 fout <<
" // common member variables" << std::endl;
3155 fout <<
" const char* fClassName;" << std::endl;
3157 fout <<
" const size_t fNvars;" << std::endl;
3158 fout <<
" size_t GetNvar() const { return fNvars; }" << std::endl;
3159 fout <<
" char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3161 fout <<
" // normalisation of input variables" << std::endl;
3162 fout <<
" double fVmin[" << GetNvar() <<
"];" << std::endl;
3163 fout <<
" double fVmax[" << GetNvar() <<
"];" << std::endl;
3164 fout <<
" double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3165 fout <<
" // normalise to output range: [-1, 1]" << std::endl;
3166 fout <<
" return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3167 fout <<
" }" << std::endl;
3169 fout <<
" // type of input variable: 'F' or 'I'" << std::endl;
3170 fout <<
" char fType[" << GetNvar() <<
"];" << std::endl;
3172 fout <<
" // initialize internal variables" << std::endl;
3173 fout <<
" void Initialize();" << std::endl;
3175 fout <<
" std::vector<double> GetMulticlassValues__( const std::vector<double>& inputValues ) const;" << std::endl;
3177 fout <<
" double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3179 fout <<
"" << std::endl;
3180 fout <<
" // private members (method specific)" << std::endl;
3183 MakeClassSpecific( fout, className );
3186 fout <<
"inline std::vector<double> " << className <<
"::GetMulticlassValues( const std::vector<double>& inputValues ) const" << std::endl;
3188 fout <<
"inline double " << className <<
"::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3190 fout <<
"{" << std::endl;
3191 fout <<
" // classifier response value" << std::endl;
3193 fout <<
" std::vector<double> retval;" << std::endl;
3195 fout <<
" double retval = 0;" << std::endl;
3198 fout <<
" // classifier response, sanity check first" << std::endl;
3199 fout <<
" if (!IsStatusClean()) {" << std::endl;
3200 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3201 fout <<
" << \" because status is dirty\" << std::endl;" << std::endl;
3202 fout <<
" }" << std::endl;
3203 fout <<
" else {" << std::endl;
3204 if (IsNormalised()) {
3205 fout <<
" // normalise variables" << std::endl;
3206 fout <<
" std::vector<double> iV;" << std::endl;
3207 fout <<
" iV.reserve(inputValues.size());" << std::endl;
3208 fout <<
" int ivar = 0;" << std::endl;
3209 fout <<
" for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3210 fout <<
" varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3211 fout <<
" iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3212 fout <<
" }" << std::endl;
3213 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() !=
Types::kLikelihood &&
3215 fout <<
" Transform( iV, -1 );" << std::endl;
3219 fout <<
" retval = GetMulticlassValues__( iV );" << std::endl;
3221 fout <<
" retval = GetMvaValue__( iV );" << std::endl;
3224 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() !=
Types::kLikelihood &&
3226 fout <<
" std::vector<double> iV(inputValues);" << std::endl;
3227 fout <<
" Transform( iV, -1 );" << std::endl;
3229 fout <<
" retval = GetMulticlassValues__( iV );" << std::endl;
3231 fout <<
" retval = GetMvaValue__( iV );" << std::endl;
3235 fout <<
" retval = GetMulticlassValues__( inputValues );" << std::endl;
3237 fout <<
" retval = GetMvaValue__( inputValues );" << std::endl;
3241 fout <<
" }" << std::endl;
3243 fout <<
" return retval;" << std::endl;
3244 fout <<
"}" << std::endl;
3247 if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3248 GetTransformationHandler().MakeFunction(fout, className,2);
3260 std::streambuf* cout_sbuf = std::cout.rdbuf();
3261 std::ofstream* o = 0;
3262 if (
gConfig().WriteOptionsReference()) {
3263 Log() << kINFO <<
"Print Help message for class " << GetName() <<
" into file: " << GetReferenceFile() <<
Endl;
3264 o =
new std::ofstream( GetReferenceFile(), std::ios::app );
3266 Log() << kFATAL <<
"<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() <<
Endl;
3268 std::cout.rdbuf( o->rdbuf() );
3273 Log() << kINFO <<
Endl;
3275 <<
"================================================================"
3279 <<
"H e l p f o r M V A m e t h o d [ " << GetName() <<
" ] :"
3284 Log() <<
"Help for MVA method [ " << GetName() <<
" ] :" <<
Endl;
3292 Log() <<
"<Suppress this message by specifying \"!H\" in the booking option>" <<
Endl;
3294 <<
"================================================================"
3301 Log() <<
"# End of Message___" <<
Endl;
3304 std::cout.rdbuf( cout_sbuf );
3319 retval = fSplRefS->Eval( theCut );
3321 else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3330 if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3331 else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3344 if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3345 return (Data()->GetEventCollection(
type));
3352 if (fEventCollections.at(idx) == 0) {
3353 fEventCollections.at(idx) = &(Data()->GetEventCollection(
type));
3354 fEventCollections.at(idx) = GetTransformationHandler().CalcTransformations(*(fEventCollections.at(idx)),
kTRUE);
3356 return *(fEventCollections.at(idx));
3364 UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000;
a>>=16;
3365 UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00;
b>>=8;
3366 UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3376 UInt_t a = GetTrainingROOTVersionCode() & 0xff0000;
a>>=16;
3377 UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00;
b>>=8;
3378 UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3389 if (mvaRes != NULL) {
3392 TH1D *mva_s_tr =
dynamic_cast<TH1D*
> (mvaRes->
GetHist(
"MVA_TRAIN_S"));
3393 TH1D *mva_b_tr =
dynamic_cast<TH1D*
> (mvaRes->
GetHist(
"MVA_TRAIN_B"));
3395 if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr)
return -1;
3397 if (SorB ==
's' || SorB ==
'S')
const Bool_t Use_Splines_for_Eff_
const Int_t NBIN_HIST_HIGH
#define ROOT_VERSION_CODE
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
R__EXTERN TSystem * gSystem
#define TMVA_VERSION_CODE
Class to manage histogram axis.
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write all objects in this collection.
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
const char * AsString() const
Return the date & time as a string (ctime() format).
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.
Describe directory structure in memory.
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
virtual Bool_t cd(const char *path=nullptr)
Change current directory to "this" directory.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
void Close(Option_t *option="") override
Close a file.
A TGraph is an object made of two arrays X and Y with npoints each.
1-D histogram with a double per channel (see TH1 documentation)}
1-D histogram with a float per channel (see TH1 documentation)}
TH1 is the base class of all histogram classes in ROOT.
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum=0)
Compute Quantiles for this histogram Quantile x_q of a probability distribution Function F is defined...
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
virtual void SetXTitle(const char *title)
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
virtual Double_t GetMaximum(Double_t maxval=FLT_MAX) const
Return maximum value smaller than maxval of bins in the range, unless the value has been overridden b...
virtual Int_t GetNbinsX() const
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual Int_t GetMaximumBin() const
Return location of bin with maximum value in the range.
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
virtual void SetYTitle(const char *title)
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
virtual Double_t KolmogorovTest(const TH1 *h2, Option_t *option="") const
Statistical test of compatibility in shape between this histogram and h2, using Kolmogorov test.
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
2-D histogram with a float per channel (see TH1 documentation)}
Int_t Fill(Double_t)
Invalid Fill method.
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Class that contains all the information of a class.
TString fWeightFileExtension
Int_t fMaxNumOfAllowedVariables
VariablePlotting & GetVariablePlotting()
class TMVA::Config::VariablePlotting fVariablePlotting
Class that contains all the data information.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Float_t GetTarget(UInt_t itgt) const
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Interface for all concrete MVA method implementations.
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
IPythonInteractive()
standard constructor
~IPythonInteractive()
standard destructor
void ClearGraphs()
This function sets the point number to 0 for all graphs.
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Virtual base Class for all MVA method.
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
void SetWeightFileDir(TString fileDir)
set directory of weight file
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables,...
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual Double_t GetSignificance() const
compute significance of mean difference
virtual Double_t GetProba(const Event *ev)
const char * GetName() const
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
void PrintHelpMessage() const
prints out method-specific help method
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
void SetupMethod()
setup of methods
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
virtual ~MethodBase()
destructor
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, , curve for given number of signal and background events; returns cut for maximum ...
virtual Double_t GetTrainingEfficiency(const TString &)
void SetWeightFileName(TString)
set the weight file name (depreciated)
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
TString GetWeightFileName() const
retrieve weight file name
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------—
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void InitBase()
default initialization called by all constructors
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
void ReadStateFromFile()
Function to write options and weights to file.
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream.
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
void SetTestvarName(const TString &v="")
void ReadVariablesFromXML(void *varnode)
read variable info from XML
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables,...
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity:
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters according to the argument This is just a dummy .
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
void AddVarsXMLTo(void *parent) const
write variable info to XML
void AddTargetsXMLTo(void *parent) const
write target info to XML
void ReadTargetsFromXML(void *tarnode)
read target info from XML
void ProcessBaseOptions()
the option string is decoded, for available options see "DeclareOptions"
void ReadStateFromXML(void *parent)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
void AddClassesXMLTo(void *parent) const
write class info to XML
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
void SetSource(const std::string &source)
PDF wrapper for histograms; uses user-defined spline interpolation.
Double_t GetVal(Double_t x) const
returns value PDF(x)
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
Class that is the base-class for a vector of result.
void Resize(Int_t entries)
std::vector< Float_t > * GetValueVector()
void SetValue(Float_t value, Int_t ievt, Bool_t type)
set MVA response
Class which takes the results of a multiclass classification.
TMatrixD GetConfusionMatrix(Double_t effB)
Returns a confusion matrix where each class is pitted against each other.
Float_t GetAchievablePur(UInt_t cls)
std::vector< Double_t > GetBestMultiClassCuts(UInt_t targetClass)
calculate the best working point (optimal cut values) for the multiclass classifier
void CreateMulticlassHistos(TString prefix, Int_t nbins, Int_t nbins_high)
this function fills the mva response histos for multiclass classification
Float_t GetAchievableEff(UInt_t cls)
void CreateMulticlassPerformanceHistos(TString prefix)
Create performance graphs for this classifier a multiclass setting.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
Bool_t DoesExist(const TString &alias) const
Returns true if there is an object stored in the result for a given alias, false otherwise.
TH1 * GetHist(const TString &alias) const
TList * GetStorage() const
void Store(TObject *obj, const char *alias=0)
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Linear interpolation of TGraph.
Timing information for training and evaluation of MVA methods.
Double_t ElapsedSeconds(void)
computes elapsed tim in seconds
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Singleton class for Global types used by TMVA.
Class for type info of MVA input variable.
void ReadFromXML(void *varnode)
read VariableInfo from stream
const TString & GetExpression() const
void ReadFromStream(std::istream &istr)
read VariableInfo from stream
void AddToXML(void *varnode)
write class to XML
void SetExternalLink(void *p)
void * GetExternalLink() const
A TMultiGraph is a collection of TGraph (or derived) objects.
virtual const char * GetName() const
Returns name of object.
Collectable string class.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
void ToLower()
Change string to lower-case.
Int_t Atoi() const
Return integer value of string.
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
Ssiz_t Last(char c) const
Find last occurrence of a character c.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
virtual const char * GetBuildNode() const
Return the build node name.
virtual int mkdir(const char *name, Bool_t recursive=kFALSE)
Make a file system directory.
virtual const char * WorkingDirectory()
Return working directory.
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Double_t Sqrt(Double_t x)
Short_t Min(Short_t a, Short_t b)