133 fFitMethod ( kUseGeneticAlgorithm ),
134 fEffMethod ( kUseEventSelection ),
159 fVarHistS_smooth( 0 ),
160 fVarHistB_smooth( 0 ),
173 fFitMethod ( kUseGeneticAlgorithm ),
174 fEffMethod ( kUseEventSelection ),
199 fVarHistS_smooth( 0 ),
200 fVarHistB_smooth( 0 ),
221 fVarHistS = fVarHistB = 0;
222 fVarHistS_smooth = fVarHistB_smooth = 0;
223 fVarPdfS = fVarPdfB = 0;
225 fBinaryTreeS = fBinaryTreeB = 0;
231 fRangeSign =
new std::vector<Int_t> ( GetNvar() );
234 fMeanS =
new std::vector<Double_t>( GetNvar() );
235 fMeanB =
new std::vector<Double_t>( GetNvar() );
236 fRmsS =
new std::vector<Double_t>( GetNvar() );
237 fRmsB =
new std::vector<Double_t>( GetNvar() );
240 fFitParams =
new std::vector<EFitParameters>( GetNvar() );
243 fFitMethod = kUseMonteCarlo;
249 for (
UInt_t i=0; i<GetNvar(); i++) {
262 fTmpCutMin =
new Double_t[GetNvar()];
263 fTmpCutMax =
new Double_t[GetNvar()];
277 delete fEffBvsSLocal;
279 if (
NULL != fCutRangeMin)
delete [] fCutRangeMin;
280 if (
NULL != fCutRangeMax)
delete [] fCutRangeMax;
281 if (
NULL != fAllVarsI)
delete [] fAllVarsI;
283 for (
UInt_t i=0;i<GetNvar();i++) {
284 if (
NULL != fCutMin[i] )
delete [] fCutMin[i];
285 if (
NULL != fCutMax[i] )
delete [] fCutMax[i];
286 if (
NULL != fCutRange[i])
delete fCutRange[i];
289 if (
NULL != fCutMin)
delete [] fCutMin;
290 if (
NULL != fCutMax)
delete [] fCutMax;
292 if (
NULL != fTmpCutMin)
delete [] fTmpCutMin;
293 if (
NULL != fTmpCutMax)
delete [] fTmpCutMax;
295 if (
NULL != fBinaryTreeS)
delete fBinaryTreeS;
296 if (
NULL != fBinaryTreeB)
delete fBinaryTreeB;
320 DeclareOptionRef(fFitMethodS =
"GA",
"FitMethod",
"Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
324 AddPreDefVal(
TString(
"MCEvents"));
325 AddPreDefVal(
TString(
"MINUIT"));
326 AddPreDefVal(
TString(
"EventScan"));
329 DeclareOptionRef(fEffMethodS =
"EffSel",
"EffMethod",
"Selection Method");
330 AddPreDefVal(
TString(
"EffSel"));
331 AddPreDefVal(
TString(
"EffPDF"));
334 fCutRange.resize(GetNvar());
335 fCutRangeMin =
new Double_t[GetNvar()];
336 fCutRangeMax =
new Double_t[GetNvar()];
339 fCutRangeMin[
ivar] = fCutRangeMax[
ivar] = -1;
342 DeclareOptionRef( fCutRangeMin, GetNvar(),
"CutRangeMin",
"Minimum of allowed cut range (set per variable)" );
343 DeclareOptionRef( fCutRangeMax, GetNvar(),
"CutRangeMax",
"Maximum of allowed cut range (set per variable)" );
345 fAllVarsI =
new TString[GetNvar()];
347 for (
UInt_t i=0; i<GetNvar(); i++) fAllVarsI[i] =
"NotEnforced";
349 DeclareOptionRef(fAllVarsI, GetNvar(),
"VarProp",
"Categorisation of cuts");
350 AddPreDefVal(
TString(
"NotEnforced"));
353 AddPreDefVal(
TString(
"FSmart"));
364 if (IsNormalised()) {
365 Log() << kWARNING <<
"Normalisation of the input variables for cut optimisation is not" <<
Endl;
366 Log() << kWARNING <<
"supported because this provides intransparent cut values, and no" <<
Endl;
367 Log() << kWARNING <<
"improvement in the performance of the algorithm." <<
Endl;
368 Log() << kWARNING <<
"Please remove \"Normalise\" option from booking option string" <<
Endl;
369 Log() << kWARNING <<
"==> Will reset normalisation flag to \"False\"" <<
Endl;
373 if (IgnoreEventsWithNegWeightsInTraining()) {
374 Log() << kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
375 << GetMethodTypeName()
376 <<
" --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string."
380 if (fFitMethodS ==
"MC" ) fFitMethod = kUseMonteCarlo;
381 else if (fFitMethodS ==
"MCEvents") fFitMethod = kUseMonteCarloEvents;
382 else if (fFitMethodS ==
"GA" ) fFitMethod = kUseGeneticAlgorithm;
383 else if (fFitMethodS ==
"SA" ) fFitMethod = kUseSimulatedAnnealing;
384 else if (fFitMethodS ==
"MINUIT" ) {
385 fFitMethod = kUseMinuit;
386 Log() << kWARNING <<
"poor performance of MINUIT in MethodCuts; preferred fit method: GA" <<
Endl;
388 else if (fFitMethodS ==
"EventScan" ) fFitMethod = kUseEventScan;
389 else Log() << kFATAL <<
"unknown minimisation method: " << fFitMethodS <<
Endl;
391 if (fEffMethodS ==
"EFFSEL" ) fEffMethod = kUseEventSelection;
392 else if (fEffMethodS ==
"EFFPDF" ) fEffMethod = kUsePDFs;
393 else fEffMethod = kUseEventSelection;
396 Log() << kINFO <<
Form(
"Use optimization method: \"%s\"",
397 (fFitMethod == kUseMonteCarlo) ?
"Monte Carlo" :
398 (fFitMethod == kUseMonteCarlo) ?
"Monte-Carlo-Event sampling" :
399 (fFitMethod == kUseEventScan) ?
"Full Event Scan (slow)" :
400 (fFitMethod == kUseMinuit) ?
"MINUIT" :
"Genetic Algorithm" ) <<
Endl;
401 Log() << kINFO <<
Form(
"Use efficiency computation method: \"%s\"",
402 (fEffMethod == kUseEventSelection) ?
"Event Selection" :
"PDF" ) <<
Endl;
412 if (fAllVarsI[
ivar] ==
"" || fAllVarsI[
ivar] ==
"NotEnforced")
theFitP = kNotEnforced;
413 else if (fAllVarsI[
ivar] ==
"FMax" )
theFitP = kForceMax;
414 else if (fAllVarsI[
ivar] ==
"FMin" )
theFitP = kForceMin;
415 else if (fAllVarsI[
ivar] ==
"FSmart" )
theFitP = kForceSmart;
417 Log() << kFATAL <<
"unknown value \'" << fAllVarsI[
ivar]
418 <<
"\' for fit parameter option " <<
Form(
"VarProp[%i]",
ivar) <<
Endl;
423 Log() << kINFO <<
"Use \"" << fAllVarsI[
ivar]
424 <<
"\" cuts for variable: " <<
"'" << (*fInputVars)[
ivar] <<
"'" <<
Endl;
437 if (fCutMin ==
NULL || fCutMax ==
NULL || fNbins == 0) {
438 Log() << kFATAL <<
"<Eval_Cuts> fCutMin/Max have zero pointer. "
439 <<
"Did you book Cuts ?" <<
Endl;
445 if (fTestSignalEff > 0) {
447 Int_t ibin = fEffBvsSLocal->FindBin( fTestSignalEff );
449 else if (
ibin >= fNbins)
ibin = fNbins - 1;
473 std::vector<TString>*
varVec = 0;
474 if (GetTransformationHandler().GetNumOfTransformations() == 0) {
476 varVec =
new std::vector<TString>;
478 varVec->push_back( DataInfo().GetVariableInfo(
ivar).GetLabel() );
481 else if (GetTransformationHandler().GetNumOfTransformations() == 1) {
483 varVec = GetTransformationHandler().GetTransformationStringsOfLastTransform();
487 varVec =
new std::vector<TString>;
489 varVec->push_back( DataInfo().GetVariableInfo(
ivar).GetLabel() +
" [transformed]" );
501 Log() << kHEADER <<
"Cut values for requested signal efficiency: " <<
trueEffS <<
Endl;
502 Log() << kINFO <<
"Corresponding background efficiency : " << fEffBvsSLocal->GetBinContent(
ibin ) <<
Endl;
503 if (GetTransformationHandler().GetNumOfTransformations() == 1) {
504 Log() << kINFO <<
"Transformation applied to input variables : \""
505 << GetTransformationHandler().GetNameOfLastTransform() <<
"\"" <<
Endl;
507 else if (GetTransformationHandler().GetNumOfTransformations() > 1) {
508 Log() << kINFO <<
"[ More than one (=" << GetTransformationHandler().GetNumOfTransformations() <<
") "
509 <<
" transformations applied in transformation chain; cuts applied on transformed quantities ] " <<
Endl;
512 Log() << kINFO <<
"Transformation applied to input variables : None" <<
Endl;
518 <<
"Cut[" << std::setw(2) <<
ivar <<
"]: "
521 << std::setw(
maxL) << (*varVec)[
ivar]
537 std::vector<Double_t>
cMin( GetNvar() );
538 std::vector<Double_t>
cMax( GetNvar() );
551 std::vector<Double_t>&
cutMin,
552 std::vector<Double_t>&
cutMax )
const
562 else if (
ibin >= fNbins)
ibin = fNbins - 1;
579 if (fEffMethod == kUsePDFs) CreateVariablePDFs();
582 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
583 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
592 fBinaryTreeB->Fill( GetEventCollection(
Types::kTraining), fBackgroundClass );
622 delete fEffBvsSLocal;
623 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
624 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
625 fEffBvsSLocal->SetDirectory(
nullptr);
631 if (fFitMethod == kUseGeneticAlgorithm ||
632 fFitMethod == kUseMonteCarlo ||
633 fFitMethod == kUseMinuit ||
634 fFitMethod == kUseSimulatedAnnealing) {
637 std::vector<Interval*> ranges;
642 if (DataInfo().GetVariableInfo(
ivar).GetVarType() ==
'I') {
643 nbins =
Int_t(fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin()) + 1;
646 if ((*fFitParams)[
ivar] == kForceSmart) {
647 if ((*fMeanS)[
ivar] > (*fMeanB)[
ivar]) (*fFitParams)[
ivar] = kForceMax;
648 else (*fFitParams)[
ivar] = kForceMin;
651 if ((*fFitParams)[
ivar] == kForceMin) {
652 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMin(), fCutRange[
ivar]->GetMin(), nbins ) );
653 ranges.push_back(
new Interval( 0, fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(), nbins ) );
655 else if ((*fFitParams)[
ivar] == kForceMax) {
656 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMin(), fCutRange[
ivar]->GetMax(), nbins ) );
657 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(),
658 fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(), nbins ) );
661 ranges.push_back(
new Interval( fCutRange[
ivar]->GetMin(), fCutRange[
ivar]->GetMax(), nbins ) );
662 ranges.push_back(
new Interval( 0, fCutRange[
ivar]->GetMax() - fCutRange[
ivar]->GetMin(), nbins ) );
669 switch (fFitMethod) {
670 case kUseGeneticAlgorithm:
679 case kUseSimulatedAnnealing:
683 Log() << kFATAL <<
"Wrong fit method: " << fFitMethod <<
Endl;
699 else if (fFitMethod == kUseEventScan) {
709 Log() << kINFO <<
"Running full event scan: " <<
Endl;
713 fIPyCurrentIter =
ic;
714 if (fExitFromTraining)
break;
724 else if (fFitMethod == kUseMonteCarloEvents) {
728 DeclareOptionRef(
nsamples,
"SampleSize",
"Number of Monte-Carlo-Event samples" );
729 DeclareOptionRef( seed,
"Seed",
"Seed for the random generator (0 takes random seeds)" );
742 Log() << kINFO <<
"Running Monte-Carlo-Event sampling over " <<
nsamples <<
" events" <<
Endl;
743 std::vector<Double_t> pars( 2*GetNvar() );
746 fIPyCurrentIter =
ic;
747 if (fExitFromTraining)
break;
771 Log() << kFATAL <<
"<MCEvents>: could not find signal events"
772 <<
" after 10000 trials - do you have signal events in your sample ?"
785 EstimatorFunction( pars );
795 else Log() << kFATAL <<
"Unknown minimisation method: " << fFitMethod <<
Endl;
797 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
798 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
804 if ((*fFitParams)[
ivar] == kForceMin && fCutMin[
ivar][
ibin] > -fgMaxAbsCutVal) {
805 fCutMin[
ivar][
ibin] = -fgMaxAbsCutVal;
807 if ((*fFitParams)[
ivar] == kForceMax && fCutMax[
ivar][
ibin] < fgMaxAbsCutVal) {
808 fCutMax[
ivar][
ibin] = fgMaxAbsCutVal;
817 for (
Double_t eff=0.1; eff<0.95; eff += 0.1) PrintCuts( eff+epsilon );
819 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
836 if (!DataInfo().IsSignal(
ev1))
return -1;
839 if (!DataInfo().IsSignal(
ev2))
return -1;
841 const Int_t nvar = GetNvar();
851 std::vector<Double_t> pars;
871 return ComputeEstimator( pars );
879 return ComputeEstimator( pars );
899 this->MatchParsToCuts( pars, &fTmpCutMin[0], &fTmpCutMax[0] );
902 switch (fEffMethod) {
904 this->GetEffsfromPDFs (&fTmpCutMin[0], &fTmpCutMax[0],
effS,
effB);
906 case kUseEventSelection:
907 this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0],
effS,
effB);
910 this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0],
effS,
effB);
940 fEffBvsSLocal->SetBinContent(
ibinS,
effB );
958 diff=(fCutRange[
ivar]->GetMax()-fTmpCutMax[
ivar])/(fCutRange[
ivar]->GetMax()-fCutRange[
ivar]->GetMin());
960 diff=(fCutRange[
ivar]->GetMin()-fTmpCutMin[
ivar])/(fCutRange[
ivar]->GetMax()-fCutRange[
ivar]->GetMin());
965 else return 10.*(1.-10.*
effS);
978 cutMin[
ivar] = ((*fRangeSign)[
ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
979 cutMax[
ivar] = ((*fRangeSign)[
ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar];
992 const UInt_t nvar = GetNvar();
1035 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1036 fNegEffWarning =
kTRUE;
1040 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1041 fNegEffWarning =
kTRUE;
1058 nSelS = fBinaryTreeS->SearchVolume( volume );
1059 nSelB = fBinaryTreeB->SearchVolume( volume );
1064 nTotS = fBinaryTreeS->GetSumOfWeights();
1065 nTotB = fBinaryTreeB->GetSumOfWeights();
1068 if (nTotS == 0 && nTotB == 0) {
1069 Log() << kFATAL <<
"<GetEffsfromSelection> fatal error in zero total number of events:"
1070 <<
" nTotS, nTotB: " << nTotS <<
" " << nTotB <<
" ***" <<
Endl;
1077 Log() << kWARNING <<
"<ComputeEstimator> zero number of signal events" <<
Endl;
1079 else if (nTotB == 0) {
1082 Log() << kWARNING <<
"<ComputeEstimator> zero number of background events" <<
Endl;
1092 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1093 fNegEffWarning =
kTRUE;
1097 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1098 fNegEffWarning =
kTRUE;
1108 fVarHistS =
new std::vector<TH1*>( GetNvar() );
1109 fVarHistB =
new std::vector<TH1*>( GetNvar() );
1110 fVarHistS_smooth =
new std::vector<TH1*>( GetNvar() );
1111 fVarHistB_smooth =
new std::vector<TH1*>( GetNvar() );
1112 fVarPdfS =
new std::vector<PDF*>( GetNvar() );
1113 fVarPdfB =
new std::vector<PDF*>( GetNvar() );
1123 if( val > minVal ) minVal = val;
1124 if( val < maxVal ) maxVal = val;
1146 histName = (*fInputVars)[
ivar] +
"_bgd";
1162 if( DataInfo().IsSignal(
ev) ){
1163 (*fVarHistS)[
ivar]->Fill( val );
1165 (*fVarHistB)[
ivar]->Fill( val );
1172 (*fVarHistS_smooth)[
ivar] = (
TH1F*)(*fVarHistS)[
ivar]->Clone();
1173 histTitle = (*fInputVars)[
ivar] +
" signal training smoothed ";
1176 histName = (*fInputVars)[
ivar] +
"_sig_smooth";
1177 (*fVarHistS_smooth)[
ivar]->SetName(histName);
1197 histTitle = (*fInputVars)[
ivar]+
" background training smoothed ";
1200 histName = (*fInputVars)[
ivar]+
"_bgd_smooth";
1201 (*fVarHistB_smooth)[
ivar]->SetName(histName);
1222 istr >> dummy >> dummy;
1224 istr >> dummy >> fNbins;
1227 istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >>
dummyInt >> dummy ;
1230 if (
dummyInt != Data()->GetNVariables()) {
1231 Log() << kFATAL <<
"<ReadWeightsFromStream> fatal error: mismatch "
1232 <<
"in number of variables: " <<
dummyInt <<
" != " << Data()->GetNVariables() <<
Endl;
1237 if (fFitMethod == kUseMonteCarlo) {
1238 Log() << kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1240 else if (fFitMethod == kUseMonteCarloEvents) {
1241 Log() << kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1243 else if (fFitMethod == kUseGeneticAlgorithm) {
1244 Log() << kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1246 else if (fFitMethod == kUseSimulatedAnnealing) {
1247 Log() << kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1249 else if (fFitMethod == kUseEventScan) {
1250 Log() << kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1253 Log() << kWARNING <<
"unknown method: " << fFitMethod <<
Endl;
1255 Log() << kINFO <<
"in " << fNbins <<
" signal efficiency bins and for " << GetNvar() <<
" variables" <<
Endl;
1259 istr.getline(buffer,200);
1260 istr.getline(buffer,200);
1264 if (fEffBvsSLocal != 0)
delete fEffBvsSLocal;
1265 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
1266 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1267 fEffBvsSLocal->SetDirectory(
nullptr);
1278 fEffSMin = fEffBvsSLocal->GetBinCenter(1);
1279 fEffSMax = fEffBvsSLocal->GetBinCenter(fNbins);
1289 std::vector<Double_t>
cutsMin;
1290 std::vector<Double_t>
cutsMax;
1296 gTools().
AddComment(
wght,
TString::Format(
"Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]", GetNvar() ) );
1329 for (
UInt_t i=0; i<GetNvar(); i++) {
1330 if (fCutMin[i] != 0)
delete [] fCutMin[i];
1331 if (fCutMax[i] != 0)
delete [] fCutMax[i];
1333 if (fCutMin != 0)
delete [] fCutMin;
1334 if (fCutMax != 0)
delete [] fCutMax;
1345 if (fFitMethod == kUseMonteCarlo) {
1346 Log() << kINFO <<
"Read cuts optimised using sample of MC events" <<
Endl;
1348 else if (fFitMethod == kUseMonteCarloEvents) {
1349 Log() << kINFO <<
"Read cuts optimised using sample of MC-Event events" <<
Endl;
1351 else if (fFitMethod == kUseGeneticAlgorithm) {
1352 Log() << kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1354 else if (fFitMethod == kUseSimulatedAnnealing) {
1355 Log() << kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1357 else if (fFitMethod == kUseEventScan) {
1358 Log() << kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1361 Log() << kWARNING <<
"unknown method: " << fFitMethod <<
Endl;
1363 Log() << kINFO <<
"Reading " << fNbins <<
" signal efficiency bins for " << GetNvar() <<
" variables" <<
Endl;
1365 delete fEffBvsSLocal;
1366 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
1367 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1368 fEffBvsSLocal->SetDirectory(
nullptr);
1371 fCutMin =
new Double_t*[GetNvar()];
1372 fCutMax =
new Double_t*[GetNvar()];
1373 for (
UInt_t i=0;i<GetNvar();i++) {
1394 Log() << kFATAL <<
"Mismatch in bins: " <<
tmpbin-1 <<
" >= " << fNbins <<
Endl;
1412 Log() << kINFO <<
"Write monitoring histograms to file: " << BaseDir()->GetPath() <<
Endl;
1414 fEffBvsSLocal->
Write();
1417 if (fEffMethod == kUsePDFs) {
1419 (*fVarHistS)[
ivar]->Write();
1420 (*fVarHistB)[
ivar]->Write();
1421 (*fVarHistS_smooth)[
ivar]->Write();
1422 (*fVarHistB_smooth)[
ivar]->Write();
1423 (*fVarPdfS)[
ivar]->GetPDFHist()->Write();
1424 (*fVarPdfB)[
ivar]->GetPDFHist()->Write();
1446 if (list->GetSize() != 2) {
1447 Log() << kFATAL <<
"<GetTrainingEfficiency> wrong number of arguments"
1449 <<
" | required format, e.g., Efficiency:0.05" <<
Endl;
1462 if (
results->GetHist(
"EFF_BVSS_TR")==0) {
1464 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
1465 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
1470 fBinaryTreeB->Fill( GetEventCollection(
Types::kTraining), fBackgroundClass );
1478 TH1*
eff_bvss_tr =
new TH1F( GetTestvarName() +
"_trainingEffBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1480 TH1*
rej_bvss_tr =
new TH1F( GetTestvarName() +
"_trainingRejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1502 Log()<< kVERBOSE <<
"unable to fill efficiency bin " <<
bini<<
" " <<
effBin <<
Endl;
1521 if (
NULL == fSplTrainEffBvsS)
return 0.0;
1531 effB = fSplTrainEffBvsS->Eval(
effS );
1556 Data()->SetCurrentType(
type);
1563 if (list->GetSize() > 2) {
1565 Log() << kFATAL <<
"<GetEfficiency> wrong number of arguments"
1567 <<
" | required format, e.g., Efficiency:0.05, or empty string" <<
Endl;
1582 if (
results->GetHist(
"MVA_EFF_BvsS")==0) {
1584 if (fBinaryTreeS!=0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
1585 if (fBinaryTreeB!=0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
1591 fBinaryTreeS->Fill( GetEventCollection(
Types::kTesting), fSignalClass );
1593 fBinaryTreeB->Fill( GetEventCollection(
Types::kTesting), fBackgroundClass );
1602 TH1*
eff_BvsS =
new TH1F( GetTestvarName() +
"_effBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1604 TH1*
rej_BvsS =
new TH1F( GetTestvarName() +
"_rejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1612 TH1*
eff_s =
new TH1F( GetTestvarName() +
"_effS", GetTestvarName() +
" (signal)", fNbins,
xmin,
xmax);
1614 TH1*
eff_b =
new TH1F( GetTestvarName() +
"_effB", GetTestvarName() +
" (background)", fNbins,
xmin,
xmax);
1640 tmpBvsS->SetPoint(fNbins+1, 1., 1.);
1656 if (
NULL == fSpleffBvsS)
return 0.0;
1671 integral += (1.0 -
effB);
1693 if (Data()->GetNEvtSigTest() > 0)
1708 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
1709 fout <<
"};" << std::endl;
1727 Log() <<
"The optimisation of rectangular cuts performed by TMVA maximises " <<
Endl;
1728 Log() <<
"the background rejection at given signal efficiency, and scans " <<
Endl;
1729 Log() <<
"over the full range of the latter quantity. Three optimisation" <<
Endl;
1730 Log() <<
"methods are optional: Monte Carlo sampling (MC), a Genetics" <<
Endl;
1731 Log() <<
"Algorithm (GA), and Simulated Annealing (SA). GA and SA are" <<
Endl;
1732 Log() <<
"expected to perform best." <<
Endl;
1734 Log() <<
"The difficulty to find the optimal cuts strongly increases with" <<
Endl;
1735 Log() <<
"the dimensionality (number of input variables) of the problem." <<
Endl;
1736 Log() <<
"This behavior is due to the non-uniqueness of the solution space."<<
Endl;
1740 Log() <<
"If the dimensionality exceeds, say, 4 input variables, it is " <<
Endl;
1741 Log() <<
"advisable to scrutinize the separation power of the variables," <<
Endl;
1742 Log() <<
"and to remove the weakest ones. If some among the input variables" <<
Endl;
1743 Log() <<
"can be described by a single cut (e.g., because signal tends to be" <<
Endl;
1744 Log() <<
"larger than background), this can be indicated to MethodCuts via" <<
Endl;
1745 Log() <<
"the \"Fsmart\" options (see option string). Choosing this option" <<
Endl;
1746 Log() <<
"reduces the number of requirements for the variable from 2 (min/max)" <<
Endl;
1747 Log() <<
"to a single one (TMVA finds out whether it is to be interpreted as" <<
Endl;
1748 Log() <<
"min or max)." <<
Endl;
1751 Log() <<
"" <<
Endl;
1753 Log() <<
"" <<
Endl;
1754 Log() <<
"Apart form the \"Fsmart\" option for the variables, the only way" <<
Endl;
1755 Log() <<
"to improve the MC sampling is to increase the sampling rate. This" <<
Endl;
1756 Log() <<
"is done via the configuration option \"MC_NRandCuts\". The execution" <<
Endl;
1757 Log() <<
"time scales linearly with the sampling rate." <<
Endl;
1758 Log() <<
"" <<
Endl;
1760 Log() <<
"" <<
Endl;
1761 Log() <<
"The algorithm terminates if no significant fitness increase has" <<
Endl;
1762 Log() <<
"been achieved within the last \"nsteps\" steps of the calculation." <<
Endl;
1763 Log() <<
"Wiggles in the ROC curve or constant background rejection of 1" <<
Endl;
1764 Log() <<
"indicate that the GA failed to always converge at the true maximum" <<
Endl;
1765 Log() <<
"fitness. In such a case, it is recommended to broaden the search " <<
Endl;
1766 Log() <<
"by increasing the population size (\"popSize\") and to give the GA " <<
Endl;
1767 Log() <<
"more time to find improvements by increasing the number of steps" <<
Endl;
1768 Log() <<
"(\"nsteps\")" <<
Endl;
1769 Log() <<
" -> increase \"popSize\" (at least >10 * number of variables)" <<
Endl;
1770 Log() <<
" -> increase \"nsteps\"" <<
Endl;
1771 Log() <<
"" <<
Endl;
1772 Log() <<
bold <<
"Simulated Annealing (SA) algorithm:" <<
resbold <<
Endl;
1773 Log() <<
"" <<
Endl;
1774 Log() <<
"\"Increasing Adaptive\" approach:" <<
Endl;
1775 Log() <<
"" <<
Endl;
1776 Log() <<
"The algorithm seeks local minima and explores their neighborhoods, while" <<
Endl;
1777 Log() <<
"changing the ambient temperature depending on the number of failures" <<
Endl;
1778 Log() <<
"in the previous steps. The performance can be improved by increasing" <<
Endl;
1779 Log() <<
"the number of iteration steps (\"MaxCalls\"), or by adjusting the" <<
Endl;
1780 Log() <<
"minimal temperature (\"MinTemperature\"). Manual adjustments of the" <<
Endl;
1781 Log() <<
"speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" <<
Endl;
1782 Log() <<
"to individual data sets should also help. Summary:" <<
brk <<
Endl;
1783 Log() <<
" -> increase \"MaxCalls\"" <<
brk <<
Endl;
1784 Log() <<
" -> adjust \"MinTemperature\"" <<
brk <<
Endl;
1785 Log() <<
" -> adjust \"TemperatureScale\"" <<
brk <<
Endl;
1786 Log() <<
" -> adjust \"AdaptiveSpeed\"" <<
Endl;
1787 Log() <<
"" <<
Endl;
1788 Log() <<
"\"Decreasing Adaptive\" approach:" <<
Endl;
1789 Log() <<
"" <<
Endl;
1790 Log() <<
"The algorithm calculates the initial temperature (based on the effect-" <<
Endl;
1791 Log() <<
"iveness of large steps) and the multiplier that ensures to reach the" <<
Endl;
1792 Log() <<
"minimal temperature with the requested number of iteration steps." <<
Endl;
1793 Log() <<
"The performance can be improved by adjusting the minimal temperature" <<
Endl;
1794 Log() <<
" (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" <<
brk <<
Endl;
1795 Log() <<
" -> increase \"MaxCalls\"" <<
brk <<
Endl;
1796 Log() <<
" -> adjust \"MinTemperature\"" <<
Endl;
1797 Log() <<
" " <<
Endl;
1798 Log() <<
"Other kernels:" <<
Endl;
1799 Log() <<
"" <<
Endl;
1800 Log() <<
"Alternative ways of counting the temperature change are implemented. " <<
Endl;
1801 Log() <<
"Each of them starts with the maximum temperature (\"MaxTemperature\")" <<
Endl;
1802 Log() <<
"and decreases while changing the temperature according to a given" <<
Endl;
1803 Log() <<
"prescription:" <<
brk <<
Endl;
1804 Log() <<
"CurrentTemperature =" <<
brk <<
Endl;
1805 Log() <<
" - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" <<
brk <<
Endl;
1806 Log() <<
" - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" <<
brk <<
Endl;
1807 Log() <<
" - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" <<
brk <<
Endl;
1808 Log() <<
" - Sin: (Sin(StepNumber / TemperatureScale) + 1) / (StepNumber + 1)*InitialTemperature + Eps" <<
brk <<
Endl;
1809 Log() <<
" - Geo: CurrentTemperature * TemperatureScale" <<
Endl;
1810 Log() <<
"" <<
Endl;
1811 Log() <<
"Their performance can be improved by adjusting initial temperature" <<
Endl;
1812 Log() <<
"(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," <<
Endl;
1813 Log() <<
"and the multiplier that scales the temperature decrease" <<
Endl;
1814 Log() <<
"(\"TemperatureScale\")" <<
brk <<
Endl;
1815 Log() <<
" -> increase \"MaxCalls\"" <<
brk <<
Endl;
1816 Log() <<
" -> adjust \"InitialTemperature\"" <<
brk <<
Endl;
1817 Log() <<
" -> adjust \"TemperatureScale\"" <<
brk <<
Endl;
1818 Log() <<
" -> adjust \"KernelTemperature\"" <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
int Int_t
Signed integer 4 bytes (int)
float Float_t
Float 4 bytes (float)
double Double_t
Double 8 bytes.
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
A TGraph is an object made of two arrays X and Y with npoints each.
1-D histogram with a float per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
A simple Binary search tree including a volume search method.
Bool_t WriteOptionsReference() const
void CheckForUnusedOptions() const
checks for unused options in option string
Class that contains all the data information.
Base class for TMVA fitters.
void SetIPythonInteractive(bool *ExitFromTraining, UInt_t *fIPyMaxIter_, UInt_t *fIPyCurrentIter_)
Double_t Run()
estimator function interface for fitting
Fitter using a Genetic Algorithm.
The TMVA::Interval Class.
Fitter using Monte Carlo sampling of parameters.
Virtual base Class for all MVA method.
Double_t EstimatorFunction(std::vector< Double_t > &) override
returns estimator for "cut fitness" used by GA
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA.
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
void AddWeightsXMLTo(void *parent) const override
create XML description for LD classification and regression (for arbitrary number of output classes/t...
void DeclareOptions() override
define the options (their key words) that can be set in the option string.
void Train(void) override
training method: here the cuts are optimised for the training sample
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
Double_t GetTrainingEfficiency(const TString &) override
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
void ProcessOptions() override
process user options.
static const Double_t fgMaxAbsCutVal
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
Cuts can only handle classification with 2 classes.
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
void ReadWeightsFromStream(std::istream &i) override
read the cuts from stream
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample
void TestClassification() override
nothing to test
void WriteMonitoringHistosToFile(void) const override
write histograms and PDFs to file for monitoring purposes
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
virtual ~MethodCuts(void)
destructor
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &) override
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
void Init(void) override
default initialisation called by all constructors
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
void ReadWeightsFromXML(void *wghtnode) override
read coefficients from xml weight file
void GetHelpMessage() const override
get help message text
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
void PrintCuts(Double_t effS) const
print cuts
PDF wrapper for histograms; uses user-defined spline interpolation.
Class that is the base-class for a vector of result.
Fitter using a Simulated Annealing Algorithm.
Linear interpolation of TGraph.
Timing information for training and evaluation of MVA methods.
Singleton class for Global types used by TMVA.
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Volume for BinarySearchTree.
Collectable string class.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Random number generator class based on M.
const char * Data() const
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
void Clone(Ssiz_t nc)
Make self a distinct copy with capacity of at least tot, where tot cannot be smaller than the current...
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.