134 fFitMethod ( kUseGeneticAlgorithm ),
135 fEffMethod ( kUseEventSelection ),
160 fVarHistS_smooth( 0 ),
161 fVarHistB_smooth( 0 ),
174 fFitMethod ( kUseGeneticAlgorithm ),
175 fEffMethod ( kUseEventSelection ),
200 fVarHistS_smooth( 0 ),
201 fVarHistB_smooth( 0 ),
222 fVarHistS = fVarHistB = 0;
223 fVarHistS_smooth = fVarHistB_smooth = 0;
224 fVarPdfS = fVarPdfB = 0;
226 fBinaryTreeS = fBinaryTreeB = 0;
232 fRangeSign =
new std::vector<Int_t> ( GetNvar() );
235 fMeanS =
new std::vector<Double_t>( GetNvar() );
236 fMeanB =
new std::vector<Double_t>( GetNvar() );
237 fRmsS =
new std::vector<Double_t>( GetNvar() );
238 fRmsB =
new std::vector<Double_t>( GetNvar() );
241 fFitParams =
new std::vector<EFitParameters>( GetNvar() );
244 fFitMethod = kUseMonteCarlo;
250 for (
UInt_t i=0; i<GetNvar(); i++) {
263 fTmpCutMin =
new Double_t[GetNvar()];
264 fTmpCutMax =
new Double_t[GetNvar()];
278 delete fEffBvsSLocal;
280 if (
NULL != fCutRangeMin)
delete [] fCutRangeMin;
281 if (
NULL != fCutRangeMax)
delete [] fCutRangeMax;
282 if (
NULL != fAllVarsI)
delete [] fAllVarsI;
284 for (
UInt_t i=0;i<GetNvar();i++) {
285 if (
NULL != fCutMin[i] )
delete [] fCutMin[i];
286 if (
NULL != fCutMax[i] )
delete [] fCutMax[i];
287 if (
NULL != fCutRange[i])
delete fCutRange[i];
290 if (
NULL != fCutMin)
delete [] fCutMin;
291 if (
NULL != fCutMax)
delete [] fCutMax;
293 if (
NULL != fTmpCutMin)
delete [] fTmpCutMin;
294 if (
NULL != fTmpCutMax)
delete [] fTmpCutMax;
296 if (
NULL != fBinaryTreeS)
delete fBinaryTreeS;
297 if (
NULL != fBinaryTreeB)
delete fBinaryTreeB;
321 DeclareOptionRef(fFitMethodS =
"GA",
"FitMethod",
"Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
325 AddPreDefVal(
TString(
"MCEvents"));
326 AddPreDefVal(
TString(
"MINUIT"));
327 AddPreDefVal(
TString(
"EventScan"));
330 DeclareOptionRef(fEffMethodS =
"EffSel",
"EffMethod",
"Selection Method");
331 AddPreDefVal(
TString(
"EffSel"));
332 AddPreDefVal(
TString(
"EffPDF"));
335 fCutRange.resize(GetNvar());
336 fCutRangeMin =
new Double_t[GetNvar()];
337 fCutRangeMax =
new Double_t[GetNvar()];
340 fCutRangeMin[
ivar] = fCutRangeMax[
ivar] = -1;
343 DeclareOptionRef( fCutRangeMin, GetNvar(),
"CutRangeMin",
"Minimum of allowed cut range (set per variable)" );
344 DeclareOptionRef( fCutRangeMax, GetNvar(),
"CutRangeMax",
"Maximum of allowed cut range (set per variable)" );
346 fAllVarsI =
new TString[GetNvar()];
348 for (
UInt_t i=0; i<GetNvar(); i++) fAllVarsI[i] =
"NotEnforced";
350 DeclareOptionRef(fAllVarsI, GetNvar(),
"VarProp",
"Categorisation of cuts");
351 AddPreDefVal(
TString(
"NotEnforced"));
354 AddPreDefVal(
TString(
"FSmart"));
365 if (IsNormalised()) {
366 Log() << kWARNING <<
"Normalisation of the input variables for cut optimisation is not" <<
Endl;
367 Log() << kWARNING <<
"supported because this provides intransparent cut values, and no" <<
Endl;
368 Log() << kWARNING <<
"improvement in the performance of the algorithm." <<
Endl;
369 Log() << kWARNING <<
"Please remove \"Normalise\" option from booking option string" <<
Endl;
370 Log() << kWARNING <<
"==> Will reset normalisation flag to \"False\"" <<
Endl;
374 if (IgnoreEventsWithNegWeightsInTraining()) {
375 Log() << kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
376 << GetMethodTypeName()
377 <<
" --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string."
381 if (fFitMethodS ==
"MC" ) fFitMethod = kUseMonteCarlo;
382 else if (fFitMethodS ==
"MCEvents") fFitMethod = kUseMonteCarloEvents;
383 else if (fFitMethodS ==
"GA" ) fFitMethod = kUseGeneticAlgorithm;
384 else if (fFitMethodS ==
"SA" ) fFitMethod = kUseSimulatedAnnealing;
385 else if (fFitMethodS ==
"MINUIT" ) {
386 fFitMethod = kUseMinuit;
387 Log() << kWARNING <<
"poor performance of MINUIT in MethodCuts; preferred fit method: GA" <<
Endl;
389 else if (fFitMethodS ==
"EventScan" ) fFitMethod = kUseEventScan;
390 else Log() << kFATAL <<
"unknown minimisation method: " << fFitMethodS <<
Endl;
392 if (fEffMethodS ==
"EFFSEL" ) fEffMethod = kUseEventSelection;
393 else if (fEffMethodS ==
"EFFPDF" ) fEffMethod = kUsePDFs;
394 else fEffMethod = kUseEventSelection;
397 Log() << kINFO <<
Form(
"Use optimization method: \"%s\"",
398 (fFitMethod == kUseMonteCarlo) ?
"Monte Carlo" :
399 (fFitMethod == kUseMonteCarlo) ?
"Monte-Carlo-Event sampling" :
400 (fFitMethod == kUseEventScan) ?
"Full Event Scan (slow)" :
401 (fFitMethod == kUseMinuit) ?
"MINUIT" :
"Genetic Algorithm" ) <<
Endl;
402 Log() << kINFO <<
Form(
"Use efficiency computation method: \"%s\"",
403 (fEffMethod == kUseEventSelection) ?
"Event Selection" :
"PDF" ) <<
Endl;
413 if (fAllVarsI[
ivar] ==
"" || fAllVarsI[
ivar] ==
"NotEnforced")
theFitP = kNotEnforced;
414 else if (fAllVarsI[
ivar] ==
"FMax" )
theFitP = kForceMax;
415 else if (fAllVarsI[
ivar] ==
"FMin" )
theFitP = kForceMin;
416 else if (fAllVarsI[
ivar] ==
"FSmart" )
theFitP = kForceSmart;
418 Log() << kFATAL <<
"unknown value \'" << fAllVarsI[
ivar]
419 <<
"\' for fit parameter option " <<
Form(
"VarProp[%i]",
ivar) <<
Endl;
424 Log() << kINFO <<
"Use \"" << fAllVarsI[
ivar]
425 <<
"\" cuts for variable: " <<
"'" << (*fInputVars)[
ivar] <<
"'" <<
Endl;
438 if (fCutMin ==
NULL || fCutMax ==
NULL || fNbins == 0) {
439 Log() << kFATAL <<
"<Eval_Cuts> fCutMin/Max have zero pointer. "
440 <<
"Did you book Cuts ?" <<
Endl;
446 if (fTestSignalEff > 0) {
448 Int_t ibin = fEffBvsSLocal->FindBin( fTestSignalEff );
450 else if (
ibin >= fNbins)
ibin = fNbins - 1;
474 std::vector<TString>*
varVec = 0;
475 if (GetTransformationHandler().GetNumOfTransformations() == 0) {
477 varVec =
new std::vector<TString>;
479 varVec->push_back( DataInfo().GetVariableInfo(
ivar).GetLabel() );
482 else if (GetTransformationHandler().GetNumOfTransformations() == 1) {
484 varVec = GetTransformationHandler().GetTransformationStringsOfLastTransform();
488 varVec =
new std::vector<TString>;
490 varVec->push_back( DataInfo().GetVariableInfo(
ivar).GetLabel() +
" [transformed]" );
502 Log() << kHEADER <<
"Cut values for requested signal efficiency: " <<
trueEffS <<
Endl;
503 Log() << kINFO <<
"Corresponding background efficiency : " << fEffBvsSLocal->GetBinContent(
ibin ) <<
Endl;
504 if (GetTransformationHandler().GetNumOfTransformations() == 1) {
505 Log() << kINFO <<
"Transformation applied to input variables : \""
506 << GetTransformationHandler().GetNameOfLastTransform() <<
"\"" <<
Endl;
508 else if (GetTransformationHandler().GetNumOfTransformations() > 1) {
509 Log() << kINFO <<
"[ More than one (=" << GetTransformationHandler().GetNumOfTransformations() <<
") "
510 <<
" transformations applied in transformation chain; cuts applied on transformed quantities ] " <<
Endl;
513 Log() << kINFO <<
"Transformation applied to input variables : None" <<
Endl;
519 <<
"Cut[" << std::setw(2) <<
ivar <<
"]: "
522 << std::setw(
maxL) << (*varVec)[
ivar]
538 std::vector<Double_t>
cMin( GetNvar() );
539 std::vector<Double_t>
cMax( GetNvar() );
552 std::vector<Double_t>&
cutMin,
553 std::vector<Double_t>&
cutMax )
const
563 else if (
ibin >= fNbins)
ibin = fNbins - 1;
580 if (fEffMethod == kUsePDFs) CreateVariablePDFs();
583 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
584 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
593 fBinaryTreeB->Fill( GetEventCollection(
Types::kTraining), fBackgroundClass );
623 delete fEffBvsSLocal;
624 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
625 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
626 fEffBvsSLocal->SetDirectory(
nullptr);
632 if (fFitMethod == kUseGeneticAlgorithm ||
633 fFitMethod == kUseMonteCarlo ||
634 fFitMethod == kUseMinuit ||
635 fFitMethod == kUseSimulatedAnnealing) {
638 std::vector<Interval*> ranges;
643 if (DataInfo().GetVariableInfo(
ivar).GetVarType() ==
'I') {
644 nbins =
Int_t(fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin()) + 1;
647 if ((*fFitParams)[
ivar] == kForceSmart) {
648 if ((*fMeanS)[
ivar] > (*fMeanB)[
ivar]) (*fFitParams)[
ivar] = kForceMax;
649 else (*fFitParams)[
ivar] = kForceMin;
652 if ((*fFitParams)[
ivar] == kForceMin) {
653 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMin(), fCutRange[
ivar]->GetMin(), nbins ) );
654 ranges.push_back(
new Interval( 0, fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(), nbins ) );
656 else if ((*fFitParams)[
ivar] == kForceMax) {
657 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMin(), fCutRange[
ivar]->GetMax(), nbins ) );
658 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(),
659 fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(), nbins ) );
662 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMin(), fCutRange[
ivar]->GetMax(), nbins ) );
663 ranges.push_back(
new Interval( 0, fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(), nbins ) );
670 switch (fFitMethod) {
671 case kUseGeneticAlgorithm:
680 case kUseSimulatedAnnealing:
684 Log() << kFATAL <<
"Wrong fit method: " << fFitMethod <<
Endl;
700 else if (fFitMethod == kUseEventScan) {
710 Log() << kINFO <<
"Running full event scan: " <<
Endl;
714 fIPyCurrentIter =
ic;
715 if (fExitFromTraining)
break;
725 else if (fFitMethod == kUseMonteCarloEvents) {
729 DeclareOptionRef(
nsamples,
"SampleSize",
"Number of Monte-Carlo-Event samples" );
730 DeclareOptionRef( seed,
"Seed",
"Seed for the random generator (0 takes random seeds)" );
743 Log() << kINFO <<
"Running Monte-Carlo-Event sampling over " <<
nsamples <<
" events" <<
Endl;
744 std::vector<Double_t> pars( 2*GetNvar() );
747 fIPyCurrentIter =
ic;
748 if (fExitFromTraining)
break;
772 Log() << kFATAL <<
"<MCEvents>: could not find signal events"
773 <<
" after 10000 trials - do you have signal events in your sample ?"
786 EstimatorFunction( pars );
796 else Log() << kFATAL <<
"Unknown minimisation method: " << fFitMethod <<
Endl;
798 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
799 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
805 if ((*fFitParams)[
ivar] == kForceMin && fCutMin[
ivar][
ibin] > -fgMaxAbsCutVal) {
806 fCutMin[
ivar][
ibin] = -fgMaxAbsCutVal;
808 if ((*fFitParams)[
ivar] == kForceMax && fCutMax[
ivar][
ibin] < fgMaxAbsCutVal) {
809 fCutMax[
ivar][
ibin] = fgMaxAbsCutVal;
820 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
837 if (!DataInfo().IsSignal(
ev1))
return -1;
840 if (!DataInfo().IsSignal(
ev2))
return -1;
842 const Int_t nvar = GetNvar();
852 std::vector<Double_t> pars;
872 return ComputeEstimator( pars );
880 return ComputeEstimator( pars );
900 this->MatchParsToCuts( pars, &fTmpCutMin[0], &fTmpCutMax[0] );
903 switch (fEffMethod) {
905 this->GetEffsfromPDFs (&fTmpCutMin[0], &fTmpCutMax[0],
effS,
effB);
907 case kUseEventSelection:
908 this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0],
effS,
effB);
911 this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0],
effS,
effB);
941 fEffBvsSLocal->SetBinContent(
ibinS,
effB );
959 diff=(fCutRange[
ivar]->GetMax()-fTmpCutMax[
ivar])/(fCutRange[
ivar]->GetMax()-fCutRange[
ivar]->GetMin());
961 diff=(fCutRange[
ivar]->GetMin()-fTmpCutMin[
ivar])/(fCutRange[
ivar]->GetMax()-fCutRange[
ivar]->GetMin());
966 else return 10.*(1.-10.*
effS);
979 cutMin[
ivar] = ((*fRangeSign)[
ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
980 cutMax[
ivar] = ((*fRangeSign)[
ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar];
993 const UInt_t nvar = GetNvar();
1036 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1037 fNegEffWarning =
kTRUE;
1041 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1042 fNegEffWarning =
kTRUE;
1059 nSelS = fBinaryTreeS->SearchVolume( volume );
1060 nSelB = fBinaryTreeB->SearchVolume( volume );
1065 nTotS = fBinaryTreeS->GetSumOfWeights();
1066 nTotB = fBinaryTreeB->GetSumOfWeights();
1069 if (nTotS == 0 && nTotB == 0) {
1070 Log() << kFATAL <<
"<GetEffsfromSelection> fatal error in zero total number of events:"
1071 <<
" nTotS, nTotB: " << nTotS <<
" " << nTotB <<
" ***" <<
Endl;
1078 Log() << kWARNING <<
"<ComputeEstimator> zero number of signal events" <<
Endl;
1080 else if (nTotB == 0) {
1083 Log() << kWARNING <<
"<ComputeEstimator> zero number of background events" <<
Endl;
1093 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1094 fNegEffWarning =
kTRUE;
1098 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1099 fNegEffWarning =
kTRUE;
1109 fVarHistS =
new std::vector<TH1*>( GetNvar() );
1110 fVarHistB =
new std::vector<TH1*>( GetNvar() );
1111 fVarHistS_smooth =
new std::vector<TH1*>( GetNvar() );
1112 fVarHistB_smooth =
new std::vector<TH1*>( GetNvar() );
1113 fVarPdfS =
new std::vector<PDF*>( GetNvar() );
1114 fVarPdfB =
new std::vector<PDF*>( GetNvar() );
1124 if( val > minVal ) minVal = val;
1125 if( val < maxVal ) maxVal = val;
1147 histName = (*fInputVars)[
ivar] +
"_bgd";
1163 if( DataInfo().IsSignal(
ev) ){
1164 (*fVarHistS)[
ivar]->Fill( val );
1166 (*fVarHistB)[
ivar]->Fill( val );
1173 (*fVarHistS_smooth)[
ivar] = (
TH1F*)(*fVarHistS)[
ivar]->Clone();
1174 histTitle = (*fInputVars)[
ivar] +
" signal training smoothed ";
1177 histName = (*fInputVars)[
ivar] +
"_sig_smooth";
1178 (*fVarHistS_smooth)[
ivar]->SetName(histName);
1198 histTitle = (*fInputVars)[
ivar]+
" background training smoothed ";
1201 histName = (*fInputVars)[
ivar]+
"_bgd_smooth";
1202 (*fVarHistB_smooth)[
ivar]->SetName(histName);
1223 istr >> dummy >> dummy;
1225 istr >> dummy >> fNbins;
1228 istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >>
dummyInt >> dummy ;
1231 if (
dummyInt != Data()->GetNVariables()) {
1232 Log() << kFATAL <<
"<ReadWeightsFromStream> fatal error: mismatch "
1233 <<
"in number of variables: " <<
dummyInt <<
" != " << Data()->GetNVariables() <<
Endl;
1238 if (fFitMethod == kUseMonteCarlo) {
1239 Log() << kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1241 else if (fFitMethod == kUseMonteCarloEvents) {
1242 Log() << kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1244 else if (fFitMethod == kUseGeneticAlgorithm) {
1245 Log() << kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1247 else if (fFitMethod == kUseSimulatedAnnealing) {
1248 Log() << kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1250 else if (fFitMethod == kUseEventScan) {
1251 Log() << kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1254 Log() << kWARNING <<
"unknown method: " << fFitMethod <<
Endl;
1256 Log() << kINFO <<
"in " << fNbins <<
" signal efficiency bins and for " << GetNvar() <<
" variables" <<
Endl;
1260 istr.getline(buffer,200);
1261 istr.getline(buffer,200);
1265 if (fEffBvsSLocal != 0)
delete fEffBvsSLocal;
1266 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
1267 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1268 fEffBvsSLocal->SetDirectory(
nullptr);
1279 fEffSMin = fEffBvsSLocal->GetBinCenter(1);
1280 fEffSMax = fEffBvsSLocal->GetBinCenter(fNbins);
1290 std::vector<Double_t>
cutsMin;
1291 std::vector<Double_t>
cutsMax;
1297 gTools().
AddComment(
wght,
TString::Format(
"Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]", GetNvar() ) );
1330 for (
UInt_t i=0; i<GetNvar(); i++) {
1331 if (fCutMin[i] != 0)
delete [] fCutMin[i];
1332 if (fCutMax[i] != 0)
delete [] fCutMax[i];
1334 if (fCutMin != 0)
delete [] fCutMin;
1335 if (fCutMax != 0)
delete [] fCutMax;
1346 if (fFitMethod == kUseMonteCarlo) {
1347 Log() << kINFO <<
"Read cuts optimised using sample of MC events" <<
Endl;
1349 else if (fFitMethod == kUseMonteCarloEvents) {
1350 Log() << kINFO <<
"Read cuts optimised using sample of MC-Event events" <<
Endl;
1352 else if (fFitMethod == kUseGeneticAlgorithm) {
1353 Log() << kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1355 else if (fFitMethod == kUseSimulatedAnnealing) {
1356 Log() << kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1358 else if (fFitMethod == kUseEventScan) {
1359 Log() << kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1362 Log() << kWARNING <<
"unknown method: " << fFitMethod <<
Endl;
1364 Log() << kINFO <<
"Reading " << fNbins <<
" signal efficiency bins for " << GetNvar() <<
" variables" <<
Endl;
1366 delete fEffBvsSLocal;
1367 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
1368 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1369 fEffBvsSLocal->SetDirectory(
nullptr);
1372 fCutMin =
new Double_t*[GetNvar()];
1373 fCutMax =
new Double_t*[GetNvar()];
1374 for (
UInt_t i=0;i<GetNvar();i++) {
1395 Log() << kFATAL <<
"Mismatch in bins: " <<
tmpbin-1 <<
" >= " << fNbins <<
Endl;
1413 Log() << kINFO <<
"Write monitoring histograms to file: " << BaseDir()->GetPath() <<
Endl;
1415 fEffBvsSLocal->
Write();
1418 if (fEffMethod == kUsePDFs) {
1420 (*fVarHistS)[
ivar]->Write();
1421 (*fVarHistB)[
ivar]->Write();
1422 (*fVarHistS_smooth)[
ivar]->Write();
1423 (*fVarHistB_smooth)[
ivar]->Write();
1424 (*fVarPdfS)[
ivar]->GetPDFHist()->Write();
1425 (*fVarPdfB)[
ivar]->GetPDFHist()->Write();
1447 if (list->GetSize() != 2) {
1448 Log() << kFATAL <<
"<GetTrainingEfficiency> wrong number of arguments"
1450 <<
" | required format, e.g., Efficiency:0.05" <<
Endl;
1463 if (
results->GetHist(
"EFF_BVSS_TR")==0) {
1465 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
1466 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
1471 fBinaryTreeB->Fill( GetEventCollection(
Types::kTraining), fBackgroundClass );
1479 TH1*
eff_bvss_tr =
new TH1F( GetTestvarName() +
"_trainingEffBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1481 TH1*
rej_bvss_tr =
new TH1F( GetTestvarName() +
"_trainingRejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1503 Log()<< kVERBOSE <<
"unable to fill efficiency bin " <<
bini<<
" " <<
effBin <<
Endl;
1522 if (
NULL == fSplTrainEffBvsS)
return 0.0;
1532 effB = fSplTrainEffBvsS->Eval(
effS );
1557 Data()->SetCurrentType(
type);
1564 if (list->GetSize() > 2) {
1566 Log() << kFATAL <<
"<GetEfficiency> wrong number of arguments"
1568 <<
" | required format, e.g., Efficiency:0.05, or empty string" <<
Endl;
1583 if (
results->GetHist(
"MVA_EFF_BvsS")==0) {
1585 if (fBinaryTreeS!=0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
1586 if (fBinaryTreeB!=0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
1592 fBinaryTreeS->Fill( GetEventCollection(
Types::kTesting), fSignalClass );
1594 fBinaryTreeB->Fill( GetEventCollection(
Types::kTesting), fBackgroundClass );
1603 TH1*
eff_BvsS =
new TH1F( GetTestvarName() +
"_effBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1605 TH1*
rej_BvsS =
new TH1F( GetTestvarName() +
"_rejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1613 TH1*
eff_s =
new TH1F( GetTestvarName() +
"_effS", GetTestvarName() +
" (signal)", fNbins,
xmin,
xmax);
1615 TH1*
eff_b =
new TH1F( GetTestvarName() +
"_effB", GetTestvarName() +
" (background)", fNbins,
xmin,
xmax);
1641 tmpBvsS->SetPoint(fNbins+1, 1., 1.);
1657 if (
NULL == fSpleffBvsS)
return 0.0;
1672 integral += (1.0 -
effB);
1694 if (Data()->GetNEvtSigTest() > 0)
1709 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
1710 fout <<
"};" << std::endl;
1728 Log() <<
"The optimisation of rectangular cuts performed by TMVA maximises " <<
Endl;
1729 Log() <<
"the background rejection at given signal efficiency, and scans " <<
Endl;
1730 Log() <<
"over the full range of the latter quantity. Three optimisation" <<
Endl;
1731 Log() <<
"methods are optional: Monte Carlo sampling (MC), a Genetics" <<
Endl;
1732 Log() <<
"Algorithm (GA), and Simulated Annealing (SA). GA and SA are" <<
Endl;
1733 Log() <<
"expected to perform best." <<
Endl;
1735 Log() <<
"The difficulty to find the optimal cuts strongly increases with" <<
Endl;
1736 Log() <<
"the dimensionality (number of input variables) of the problem." <<
Endl;
1737 Log() <<
"This behavior is due to the non-uniqueness of the solution space."<<
Endl;
1741 Log() <<
"If the dimensionality exceeds, say, 4 input variables, it is " <<
Endl;
1742 Log() <<
"advisable to scrutinize the separation power of the variables," <<
Endl;
1743 Log() <<
"and to remove the weakest ones. If some among the input variables" <<
Endl;
1744 Log() <<
"can be described by a single cut (e.g., because signal tends to be" <<
Endl;
1745 Log() <<
"larger than background), this can be indicated to MethodCuts via" <<
Endl;
1746 Log() <<
"the \"Fsmart\" options (see option string). Choosing this option" <<
Endl;
1747 Log() <<
"reduces the number of requirements for the variable from 2 (min/max)" <<
Endl;
1748 Log() <<
"to a single one (TMVA finds out whether it is to be interpreted as" <<
Endl;
1749 Log() <<
"min or max)." <<
Endl;
1752 Log() <<
"" <<
Endl;
1754 Log() <<
"" <<
Endl;
1755 Log() <<
"Apart form the \"Fsmart\" option for the variables, the only way" <<
Endl;
1756 Log() <<
"to improve the MC sampling is to increase the sampling rate. This" <<
Endl;
1757 Log() <<
"is done via the configuration option \"MC_NRandCuts\". The execution" <<
Endl;
1758 Log() <<
"time scales linearly with the sampling rate." <<
Endl;
1759 Log() <<
"" <<
Endl;
1761 Log() <<
"" <<
Endl;
1762 Log() <<
"The algorithm terminates if no significant fitness increase has" <<
Endl;
1763 Log() <<
"been achieved within the last \"nsteps\" steps of the calculation." <<
Endl;
1764 Log() <<
"Wiggles in the ROC curve or constant background rejection of 1" <<
Endl;
1765 Log() <<
"indicate that the GA failed to always converge at the true maximum" <<
Endl;
1766 Log() <<
"fitness. In such a case, it is recommended to broaden the search " <<
Endl;
1767 Log() <<
"by increasing the population size (\"popSize\") and to give the GA " <<
Endl;
1768 Log() <<
"more time to find improvements by increasing the number of steps" <<
Endl;
1769 Log() <<
"(\"nsteps\")" <<
Endl;
1770 Log() <<
" -> increase \"popSize\" (at least >10 * number of variables)" <<
Endl;
1771 Log() <<
" -> increase \"nsteps\"" <<
Endl;
1772 Log() <<
"" <<
Endl;
1773 Log() <<
bold <<
"Simulated Annealing (SA) algorithm:" <<
resbold <<
Endl;
1774 Log() <<
"" <<
Endl;
1775 Log() <<
"\"Increasing Adaptive\" approach:" <<
Endl;
1776 Log() <<
"" <<
Endl;
1777 Log() <<
"The algorithm seeks local minima and explores their neighborhoods, while" <<
Endl;
1778 Log() <<
"changing the ambient temperature depending on the number of failures" <<
Endl;
1779 Log() <<
"in the previous steps. The performance can be improved by increasing" <<
Endl;
1780 Log() <<
"the number of iteration steps (\"MaxCalls\"), or by adjusting the" <<
Endl;
1781 Log() <<
"minimal temperature (\"MinTemperature\"). Manual adjustments of the" <<
Endl;
1782 Log() <<
"speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" <<
Endl;
1783 Log() <<
"to individual data sets should also help. Summary:" <<
brk <<
Endl;
1784 Log() <<
" -> increase \"MaxCalls\"" <<
brk <<
Endl;
1785 Log() <<
" -> adjust \"MinTemperature\"" <<
brk <<
Endl;
1786 Log() <<
" -> adjust \"TemperatureScale\"" <<
brk <<
Endl;
1787 Log() <<
" -> adjust \"AdaptiveSpeed\"" <<
Endl;
1788 Log() <<
"" <<
Endl;
1789 Log() <<
"\"Decreasing Adaptive\" approach:" <<
Endl;
1790 Log() <<
"" <<
Endl;
1791 Log() <<
"The algorithm calculates the initial temperature (based on the effect-" <<
Endl;
1792 Log() <<
"iveness of large steps) and the multiplier that ensures to reach the" <<
Endl;
1793 Log() <<
"minimal temperature with the requested number of iteration steps." <<
Endl;
1794 Log() <<
"The performance can be improved by adjusting the minimal temperature" <<
Endl;
1795 Log() <<
" (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" <<
brk <<
Endl;
1796 Log() <<
" -> increase \"MaxCalls\"" <<
brk <<
Endl;
1797 Log() <<
" -> adjust \"MinTemperature\"" <<
Endl;
1798 Log() <<
" " <<
Endl;
1799 Log() <<
"Other kernels:" <<
Endl;
1800 Log() <<
"" <<
Endl;
1801 Log() <<
"Alternative ways of counting the temperature change are implemented. " <<
Endl;
1802 Log() <<
"Each of them starts with the maximum temperature (\"MaxTemperature\")" <<
Endl;
1803 Log() <<
"and decreases while changing the temperature according to a given" <<
Endl;
1804 Log() <<
"prescription:" <<
brk <<
Endl;
1805 Log() <<
"CurrentTemperature =" <<
brk <<
Endl;
1806 Log() <<
" - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" <<
brk <<
Endl;
1807 Log() <<
" - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" <<
brk <<
Endl;
1808 Log() <<
" - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" <<
brk <<
Endl;
1809 Log() <<
" - Sin: (Sin(StepNumber / TemperatureScale) + 1) / (StepNumber + 1)*InitialTemperature + Eps" <<
brk <<
Endl;
1810 Log() <<
" - Geo: CurrentTemperature * TemperatureScale" <<
Endl;
1811 Log() <<
"" <<
Endl;
1812 Log() <<
"Their performance can be improved by adjusting initial temperature" <<
Endl;
1813 Log() <<
"(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," <<
Endl;
1814 Log() <<
"and the multiplier that scales the temperature decrease" <<
Endl;
1815 Log() <<
"(\"TemperatureScale\")" <<
brk <<
Endl;
1816 Log() <<
" -> increase \"MaxCalls\"" <<
brk <<
Endl;
1817 Log() <<
" -> adjust \"InitialTemperature\"" <<
brk <<
Endl;
1818 Log() <<
" -> adjust \"TemperatureScale\"" <<
brk <<
Endl;
1819 Log() <<
" -> adjust \"KernelTemperature\"" <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
A TGraph is an object made of two arrays X and Y with npoints each.
1-D histogram with a float per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
A simple Binary search tree including a volume search method.
Bool_t WriteOptionsReference() const
void CheckForUnusedOptions() const
checks for unused options in option string
Class that contains all the data information.
Base class for TMVA fitters.
void SetIPythonInteractive(bool *ExitFromTraining, UInt_t *fIPyMaxIter_, UInt_t *fIPyCurrentIter_)
Double_t Run()
estimator function interface for fitting
Fitter using a Genetic Algorithm.
The TMVA::Interval Class.
Fitter using Monte Carlo sampling of parameters.
Virtual base Class for all MVA method.
Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular ...
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA.
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
void DeclareOptions()
define the options (their key words) that can be set in the option string.
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
void GetHelpMessage() const
get help message text
void Train(void)
training method: here the cuts are optimised for the training sample
static const Double_t fgMaxAbsCutVal
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
void Init(void)
default initialisation called by all constructors
Double_t GetTrainingEfficiency(const TString &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.
void ProcessOptions()
process user options.
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
virtual ~MethodCuts(void)
destructor
void TestClassification()
nothing to test
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
void PrintCuts(Double_t effS) const
print cuts
PDF wrapper for histograms; uses user-defined spline interpolation.
Class that is the base-class for a vector of result.
Fitter using a Simulated Annealing Algorithm.
Linear interpolation of TGraph.
Timing information for training and evaluation of MVA methods.
Singleton class for Global types used by TMVA.
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Volume for BinarySearchTree.
Collectable string class.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Random number generator class based on M.
const char * Data() const
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
void Clone(Ssiz_t nc)
Make self a distinct copy with capacity of at least tot, where tot cannot be smaller than the current...
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.