97 #include "TDirectory.h"
124 const
Double_t TMVA::MethodCuts::fgMaxAbsCutVal = 1.0e30;
129 TMVA::MethodCuts::MethodCuts( const
TString& jobName,
131 DataSetInfo& theData,
134 MethodBase( jobName, Types::kCuts, methodTitle, theData, theOption, theTargetDir ),
135 fFitMethod ( kUseGeneticAlgorithm ),
136 fEffMethod ( kUseEventSelection ),
161 fVarHistS_smooth( 0 ),
162 fVarHistB_smooth( 0 ),
176 fFitMethod ( kUseGeneticAlgorithm ),
177 fEffMethod ( kUseEventSelection ),
202 fVarHistS_smooth( 0 ),
203 fVarHistB_smooth( 0 ),
224 fVarHistS = fVarHistB = 0;
225 fVarHistS_smooth = fVarHistB_smooth = 0;
226 fVarPdfS = fVarPdfB = 0;
228 fBinaryTreeS = fBinaryTreeB = 0;
234 fRangeSign =
new std::vector<Int_t> ( GetNvar() );
235 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) (*fRangeSign)[ivar] = +1;
237 fMeanS =
new std::vector<Double_t>( GetNvar() );
238 fMeanB =
new std::vector<Double_t>( GetNvar() );
239 fRmsS =
new std::vector<Double_t>( GetNvar() );
240 fRmsB =
new std::vector<Double_t>( GetNvar() );
243 fFitParams =
new std::vector<EFitParameters>( GetNvar() );
244 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) (*fFitParams)[ivar] = kNotEnforced;
246 fFitMethod = kUseMonteCarlo;
252 for (
UInt_t i=0; i<GetNvar(); i++) {
258 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
259 for (
Int_t ibin=0; ibin<fNbins; ibin++) {
260 fCutMin[ivar][ibin] = 0;
261 fCutMax[ivar][ibin] = 0;
265 fTmpCutMin =
new Double_t[GetNvar()];
266 fTmpCutMax =
new Double_t[GetNvar()];
280 delete fEffBvsSLocal;
282 if (
NULL != fCutRangeMin)
delete [] fCutRangeMin;
283 if (
NULL != fCutRangeMax)
delete [] fCutRangeMax;
284 if (
NULL != fAllVarsI)
delete [] fAllVarsI;
286 for (
UInt_t i=0;i<GetNvar();i++) {
287 if (
NULL != fCutMin[i] )
delete [] fCutMin[i];
288 if (
NULL != fCutMax[i] )
delete [] fCutMax[i];
289 if (
NULL != fCutRange[i])
delete fCutRange[i];
292 if (
NULL != fCutMin)
delete [] fCutMin;
293 if (
NULL != fCutMax)
delete [] fCutMax;
295 if (
NULL != fTmpCutMin)
delete [] fTmpCutMin;
296 if (
NULL != fTmpCutMax)
delete [] fTmpCutMax;
298 if (
NULL != fBinaryTreeS)
delete fBinaryTreeS;
299 if (
NULL != fBinaryTreeB)
delete fBinaryTreeB;
322 DeclareOptionRef(fFitMethodS =
"GA",
"FitMethod",
"Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
326 AddPreDefVal(
TString(
"MCEvents"));
327 AddPreDefVal(
TString(
"MINUIT"));
328 AddPreDefVal(
TString(
"EventScan"));
331 DeclareOptionRef(fEffMethodS =
"EffSel",
"EffMethod",
"Selection Method");
332 AddPreDefVal(
TString(
"EffSel"));
333 AddPreDefVal(
TString(
"EffPDF"));
336 fCutRange.resize(GetNvar());
337 fCutRangeMin =
new Double_t[GetNvar()];
338 fCutRangeMax =
new Double_t[GetNvar()];
339 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
341 fCutRangeMin[ivar] = fCutRangeMax[ivar] = -1;
344 DeclareOptionRef( fCutRangeMin, GetNvar(),
"CutRangeMin",
"Minimum of allowed cut range (set per variable)" );
345 DeclareOptionRef( fCutRangeMax, GetNvar(),
"CutRangeMax",
"Maximum of allowed cut range (set per variable)" );
347 fAllVarsI =
new TString[GetNvar()];
349 for (
UInt_t i=0; i<GetNvar(); i++) fAllVarsI[i] =
"NotEnforced";
351 DeclareOptionRef(fAllVarsI, GetNvar(),
"VarProp",
"Categorisation of cuts");
352 AddPreDefVal(
TString(
"NotEnforced"));
355 AddPreDefVal(
TString(
"FSmart"));
365 if (IsNormalised()) {
366 Log() <<
kWARNING <<
"Normalisation of the input variables for cut optimisation is not" <<
Endl;
367 Log() <<
kWARNING <<
"supported because this provides intransparent cut values, and no" <<
Endl;
368 Log() <<
kWARNING <<
"improvement in the performance of the algorithm." <<
Endl;
369 Log() <<
kWARNING <<
"Please remove \"Normalise\" option from booking option string" <<
Endl;
370 Log() <<
kWARNING <<
"==> Will reset normalisation flag to \"False\"" <<
Endl;
374 if (IgnoreEventsWithNegWeightsInTraining()) {
375 Log() <<
kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
376 << GetMethodTypeName()
377 <<
" --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string."
381 if (fFitMethodS ==
"MC" ) fFitMethod = kUseMonteCarlo;
382 else if (fFitMethodS ==
"MCEvents") fFitMethod = kUseMonteCarloEvents;
383 else if (fFitMethodS ==
"GA" ) fFitMethod = kUseGeneticAlgorithm;
384 else if (fFitMethodS ==
"SA" ) fFitMethod = kUseSimulatedAnnealing;
385 else if (fFitMethodS ==
"MINUIT" ) {
386 fFitMethod = kUseMinuit;
387 Log() <<
kWARNING <<
"poor performance of MINUIT in MethodCuts; preferred fit method: GA" <<
Endl;
389 else if (fFitMethodS ==
"EventScan" ) fFitMethod = kUseEventScan;
390 else Log() <<
kFATAL <<
"unknown minimisation method: " << fFitMethodS <<
Endl;
392 if (fEffMethodS ==
"EFFSEL" ) fEffMethod = kUseEventSelection;
393 else if (fEffMethodS ==
"EFFPDF" ) fEffMethod = kUsePDFs;
394 else fEffMethod = kUseEventSelection;
397 Log() <<
kINFO <<
Form(
"Use optimization method: \"%s\"",
398 (fFitMethod == kUseMonteCarlo) ?
"Monte Carlo" :
399 (fFitMethod == kUseMonteCarlo) ?
"Monte-Carlo-Event sampling" :
400 (fFitMethod == kUseEventScan) ?
"Full Event Scan (slow)" :
401 (fFitMethod == kUseMinuit) ?
"MINUIT" :
"Genetic Algorithm" ) <<
Endl;
402 Log() <<
kINFO <<
Form(
"Use efficiency computation method: \"%s\"",
403 (fEffMethod == kUseEventSelection) ?
"Event Selection" :
"PDF" ) <<
Endl;
406 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
407 fCutRange[ivar] =
new Interval( fCutRangeMin[ivar], fCutRangeMax[ivar] );
411 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
413 if (fAllVarsI[ivar] ==
"" || fAllVarsI[ivar] ==
"NotEnforced") theFitP = kNotEnforced;
414 else if (fAllVarsI[ivar] ==
"FMax" ) theFitP = kForceMax;
415 else if (fAllVarsI[ivar] ==
"FMin" ) theFitP = kForceMin;
416 else if (fAllVarsI[ivar] ==
"FSmart" ) theFitP = kForceSmart;
418 Log() <<
kFATAL <<
"unknown value \'" << fAllVarsI[ivar]
419 <<
"\' for fit parameter option " <<
Form(
"VarProp[%i]",ivar) <<
Endl;
421 (*fFitParams)[ivar] = theFitP;
423 if (theFitP != kNotEnforced)
424 Log() <<
kINFO <<
"Use \"" << fAllVarsI[ivar]
425 <<
"\" cuts for variable: " <<
"'" << (*fInputVars)[ivar] <<
"'" <<
Endl;
435 NoErrorCalc(err, errUpper);
438 if (fCutMin ==
NULL || fCutMax ==
NULL || fNbins == 0) {
439 Log() <<
kFATAL <<
"<Eval_Cuts> fCutMin/Max have zero pointer. "
440 <<
"Did you book Cuts ?" <<
Endl;
443 const Event* ev = GetEvent();
446 if (fTestSignalEff > 0) {
448 Int_t ibin = fEffBvsSLocal->FindBin( fTestSignalEff );
449 if (ibin < 0 ) ibin = 0;
450 else if (ibin >= fNbins) ibin = fNbins - 1;
453 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++)
454 passed &= ( (ev->
GetValue(ivar) > fCutMin[ivar][ibin]) &&
455 (ev->
GetValue(ivar) <= fCutMax[ivar][ibin]) );
457 return passed ? 1. : 0. ;
467 std::vector<Double_t> cutsMin;
468 std::vector<Double_t> cutsMax;
469 Int_t ibin = fEffBvsSLocal->FindBin( effS );
471 Double_t trueEffS = GetCuts( effS, cutsMin, cutsMax );
474 std::vector<TString>* varVec = 0;
475 if (GetTransformationHandler().GetNumOfTransformations() == 0) {
477 varVec =
new std::vector<TString>;
478 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
479 varVec->push_back( DataInfo().GetVariableInfo(ivar).GetLabel() );
482 else if (GetTransformationHandler().GetNumOfTransformations() == 1) {
484 varVec = GetTransformationHandler().GetTransformationStringsOfLastTransform();
488 varVec =
new std::vector<TString>;
489 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
490 varVec->push_back( DataInfo().GetVariableInfo(ivar).GetLabel() +
" [transformed]" );
495 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
496 if ((
UInt_t)(*varVec)[ivar].Length() > maxL) maxL = (*varVec)[ivar].Length();
498 UInt_t maxLine = 20+maxL+16;
500 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
502 Log() <<
kINFO <<
"Cut values for requested signal efficiency: " << trueEffS <<
Endl;
503 Log() <<
kINFO <<
"Corresponding background efficiency : " << fEffBvsSLocal->GetBinContent( ibin ) <<
Endl;
504 if (GetTransformationHandler().GetNumOfTransformations() == 1) {
505 Log() <<
kINFO <<
"Transformation applied to input variables : \""
506 << GetTransformationHandler().GetNameOfLastTransform() <<
"\"" <<
Endl;
508 else if (GetTransformationHandler().GetNumOfTransformations() > 1) {
509 Log() <<
kINFO <<
"[ More than one (=" << GetTransformationHandler().GetNumOfTransformations() <<
") "
510 <<
" transformations applied in transformation chain; cuts applied on transformed quantities ] " <<
Endl;
513 Log() <<
kINFO <<
"Transformation applied to input variables : None" <<
Endl;
515 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
517 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
519 <<
"Cut[" << std::setw(2) << ivar <<
"]: "
520 << std::setw(10) << cutsMin[ivar]
522 << std::setw(maxL) << (*varVec)[ivar]
524 << std::setw(10) << cutsMax[ivar] <<
Endl;
526 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
538 std::vector<Double_t> cMin( GetNvar() );
539 std::vector<Double_t> cMax( GetNvar() );
540 Double_t trueEffS = GetCuts( effS, cMin, cMax );
541 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
542 cutMin[ivar] = cMin[ivar];
543 cutMax[ivar] = cMax[ivar];
552 std::vector<Double_t>& cutMin,
553 std::vector<Double_t>& cutMax )
const
556 Int_t ibin = fEffBvsSLocal->FindBin( effS );
559 Double_t trueEffS = fEffBvsSLocal->GetBinLowEdge( ibin );
562 if (ibin < 0 ) ibin = 0;
563 else if (ibin >= fNbins) ibin = fNbins - 1;
567 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
568 cutMin.push_back( fCutMin[ivar][ibin] );
569 cutMax.push_back( fCutMax[ivar][ibin] );
580 if (fEffMethod == kUsePDFs) CreateVariablePDFs();
583 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
584 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
593 fBinaryTreeB->Fill( GetEventCollection(
Types::kTraining), fBackgroundClass );
595 for (
UInt_t ivar =0; ivar <
Data()->GetNVariables(); ivar++) {
612 if (
TMath::Abs(fCutRange[ivar]->GetMin() - fCutRange[ivar]->GetMax()) < 1.0e-300 ) {
613 fCutRange[ivar]->SetMin( xmin );
614 fCutRange[ivar]->SetMax( xmax );
616 else if (xmin > fCutRange[ivar]->GetMin()) fCutRange[ivar]->SetMin( xmin );
617 else if (xmax < fCutRange[ivar]->GetMax()) fCutRange[ivar]->SetMax( xmax );
620 std::vector<TH1F*> signalDist, bkgDist;
623 delete fEffBvsSLocal;
624 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
625 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
626 fEffBvsSLocal->SetDirectory(0);
629 for (
Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
632 if (fFitMethod == kUseGeneticAlgorithm ||
633 fFitMethod == kUseMonteCarlo ||
634 fFitMethod == kUseMinuit ||
635 fFitMethod == kUseSimulatedAnnealing) {
638 std::vector<Interval*> ranges;
640 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
643 if (DataInfo().GetVariableInfo(ivar).GetVarType() ==
'I') {
644 nbins =
Int_t(fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin()) + 1;
647 if ((*fFitParams)[ivar] == kForceSmart) {
648 if ((*fMeanS)[ivar] > (*fMeanB)[ivar]) (*fFitParams)[ivar] = kForceMax;
649 else (*fFitParams)[ivar] = kForceMin;
652 if ((*fFitParams)[ivar] == kForceMin) {
653 ranges.push_back(
new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMin(), nbins ) );
654 ranges.push_back(
new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
656 else if ((*fFitParams)[ivar] == kForceMax) {
657 ranges.push_back(
new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
658 ranges.push_back(
new Interval( fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(),
659 fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
662 ranges.push_back(
new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
663 ranges.push_back(
new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
670 switch (fFitMethod) {
671 case kUseGeneticAlgorithm:
672 fitter =
new GeneticFitter( *
this,
Form(
"%sFitter_GA", GetName()), ranges, GetOptions() );
675 fitter =
new MCFitter ( *
this,
Form(
"%sFitter_MC", GetName()), ranges, GetOptions() );
678 fitter =
new MinuitFitter ( *
this,
Form(
"%sFitter_MINUIT", GetName()), ranges, GetOptions() );
680 case kUseSimulatedAnnealing:
684 Log() <<
kFATAL <<
"Wrong fit method: " << fFitMethod <<
Endl;
693 for (
UInt_t ivar=0; ivar<ranges.size(); ivar++)
delete ranges[ivar];
698 else if (fFitMethod == kUseEventScan) {
704 Int_t nsamples =
Int_t(0.5*nevents*(nevents - 1));
708 for (
Int_t ievt1=0; ievt1<nevents; ievt1++) {
709 for (
Int_t ievt2=ievt1+1; ievt2<nevents; ievt2++) {
711 EstimatorFunction( ievt1, ievt2 );
720 else if (fFitMethod == kUseMonteCarloEvents) {
722 Int_t nsamples = 200000;
724 DeclareOptionRef( nsamples,
"SampleSize",
"Number of Monte-Carlo-Event samples" );
725 DeclareOptionRef( seed,
"Seed",
"Seed for the random generator (0 takes random seeds)" );
737 Log() <<
kINFO <<
"Running Monte-Carlo-Event sampling over " << nsamples <<
" events" <<
Endl;
738 std::vector<Double_t> pars( 2*GetNvar() );
740 for (
Int_t itoy=0; itoy<nsamples; itoy++) {
742 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
755 const Event *ev1 = GetEvent(ievt1);
756 isSignal = DataInfo().IsSignal(ev1);
757 evt1 = ev1->GetValue( ivar );
759 const Event *ev2 = GetEvent(ievt2);
760 isSignal &= DataInfo().IsSignal(ev2);
763 if (nbreak++ > 10000)
Log() <<
kFATAL <<
"<MCEvents>: could not find signal events"
764 <<
" after 10000 trials - do you have signal events in your sample ?"
770 if (evt1 > evt2) {
Double_t z = evt1; evt1 = evt2; evt2 = z; }
772 pars[2*ivar+1] = evt2 - evt1;
776 EstimatorFunction( pars );
786 else Log() <<
kFATAL <<
"Unknown minimisation method: " << fFitMethod <<
Endl;
788 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
789 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
792 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
793 for (
Int_t ibin=0; ibin<fNbins; ibin++) {
795 if ((*fFitParams)[ivar] == kForceMin && fCutMin[ivar][ibin] > -fgMaxAbsCutVal) {
796 fCutMin[ivar][ibin] = -fgMaxAbsCutVal;
798 if ((*fFitParams)[ivar] == kForceMax && fCutMax[ivar][ibin] < fgMaxAbsCutVal) {
799 fCutMax[ivar][ibin] = fgMaxAbsCutVal;
808 for (
Double_t eff=0.1; eff<0.95; eff += 0.1) PrintCuts( eff+epsilon );
823 const Event *ev1 = GetEvent(ievt1);
824 if (!DataInfo().IsSignal(ev1))
return -1;
826 const Event *ev2 = GetEvent(ievt2);
827 if (!DataInfo().IsSignal(ev2))
return -1;
829 const Int_t nvar = GetNvar();
833 for (
Int_t ivar=0; ivar<nvar; ivar++) {
839 std::vector<Double_t> pars;
840 for (
Int_t ivar=0; ivar<nvar; ivar++) {
843 if (evt1[ivar] < evt2[ivar]) {
852 pars.push_back( cutMin );
853 pars.push_back( cutMax - cutMin );
859 return ComputeEstimator( pars );
867 return ComputeEstimator( pars );
885 this->MatchParsToCuts( pars, &fTmpCutMin[0], &fTmpCutMax[0] );
888 switch (fEffMethod) {
890 this->GetEffsfromPDFs (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
892 case kUseEventSelection:
893 this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
896 this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
907 Int_t ibinS = fEffBvsSLocal->FindBin( effS );
909 Double_t effBH = fEffBvsSLocal->GetBinContent( ibinS );
910 Double_t effBH_left = (ibinS > 1 ) ? fEffBvsSLocal->GetBinContent( ibinS-1 ) : effBH;
911 Double_t effBH_right = (ibinS < fNbins) ? fEffBvsSLocal->GetBinContent( ibinS+1 ) : effBH;
913 Double_t average = 0.5*(effBH_left + effBH_right);
914 if (effBH < effB) average = effBH;
918 eta = ( -
TMath::Abs(effBH-average) + (1.0 - (effBH - effB))) / (1.0 + effS);
925 if (effBH < 0 || effBH > effB) {
926 fEffBvsSLocal->SetBinContent( ibinS, effB );
927 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
928 fCutMin[ivar][ibinS-1] = fTmpCutMin[ivar];
929 fCutMax[ivar][ibinS-1] = fTmpCutMax[ivar];
943 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
944 diff=(fCutRange[ivar]->GetMax()-fTmpCutMax[ivar])/(fCutRange[ivar]->GetMax()-fCutRange[ivar]->GetMin());
946 diff=(fCutRange[ivar]->GetMin()-fTmpCutMin[ivar])/(fCutRange[ivar]->GetMax()-fCutRange[ivar]->GetMin());
947 penalty+=4.*diff*diff;
950 if (effS<1.e-4)
return 10.0+penalty;
951 else return 10.*(1.-10.*effS);
962 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
964 cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
965 cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar];
975 if (ibin < 1 || ibin > fNbins)
Log() <<
kFATAL <<
"::MatchCutsToPars: bin error: "
978 const UInt_t nvar = GetNvar();
981 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
982 cutMin[ivar] = cutMinAll[ivar][ibin-1];
983 cutMax[ivar] = cutMaxAll[ivar][ibin-1];
986 MatchCutsToPars( pars, cutMin, cutMax );
997 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
999 pars[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
1000 pars[ipar+1] = cutMax[ivar] - cutMin[ivar];
1013 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1014 effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
1015 effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
1021 if( !fNegEffWarning )
Log() <<
kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1022 fNegEffWarning =
kTRUE;
1026 if( !fNegEffWarning )
Log() <<
kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1027 fNegEffWarning =
kTRUE;
1041 Volume* volume =
new Volume( cutMin, cutMax, GetNvar() );
1044 nSelS = fBinaryTreeS->SearchVolume( volume );
1045 nSelB = fBinaryTreeB->SearchVolume( volume );
1050 nTotS = fBinaryTreeS->GetSumOfWeights();
1051 nTotB = fBinaryTreeB->GetSumOfWeights();
1054 if (nTotS == 0 && nTotB == 0) {
1055 Log() <<
kFATAL <<
"<GetEffsfromSelection> fatal error in zero total number of events:"
1056 <<
" nTotS, nTotB: " << nTotS <<
" " << nTotB <<
" ***" <<
Endl;
1063 Log() <<
kWARNING <<
"<ComputeEstimator> zero number of signal events" <<
Endl;
1065 else if (nTotB == 0) {
1068 Log() <<
kWARNING <<
"<ComputeEstimator> zero number of background events" <<
Endl;
1078 if( !fNegEffWarning )
Log() <<
kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1079 fNegEffWarning =
kTRUE;
1083 if( !fNegEffWarning )
Log() <<
kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1084 fNegEffWarning =
kTRUE;
1094 fVarHistS =
new std::vector<TH1*>( GetNvar() );
1095 fVarHistB =
new std::vector<TH1*>( GetNvar() );
1096 fVarHistS_smooth =
new std::vector<TH1*>( GetNvar() );
1097 fVarHistB_smooth =
new std::vector<TH1*>( GetNvar() );
1098 fVarPdfS =
new std::vector<PDF*>( GetNvar() );
1099 fVarPdfB =
new std::vector<PDF*>( GetNvar() );
1106 for(
UInt_t ievt=0; ievt<
Data()->GetNEvents(); ievt++ ){
1107 const Event *ev = GetEvent(ievt);
1109 if( val > minVal ) minVal = val;
1110 if( val < maxVal ) maxVal = val;
1113 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1116 TString histTitle = (*fInputVars)[ivar] +
" signal training";
1117 TString histName = (*fInputVars)[ivar] +
"_sig";
1128 (*fVarHistS)[ivar] =
new TH1F(histName.
Data(), histTitle.
Data(), fNbins, minVal, maxVal );
1131 histTitle = (*fInputVars)[ivar] +
" background training";
1132 histName = (*fInputVars)[ivar] +
"_bgd";
1143 (*fVarHistB)[ivar] =
new TH1F(histName.
Data(), histTitle.Data(), fNbins, minVal, maxVal );
1145 for(
UInt_t ievt=0; ievt<
Data()->GetNEvents(); ievt++ ){
1146 const Event *ev = GetEvent(ievt);
1148 if( DataInfo().IsSignal(ev) ){
1149 (*fVarHistS)[ivar]->Fill( val );
1151 (*fVarHistB)[ivar]->Fill( val );
1158 (*fVarHistS_smooth)[ivar] = (
TH1F*)(*fVarHistS)[ivar]->Clone();
1159 histTitle = (*fInputVars)[ivar] +
" signal training smoothed ";
1160 histTitle += nsmooth;
1161 histTitle +=
" times";
1162 histName = (*fInputVars)[ivar] +
"_sig_smooth";
1163 (*fVarHistS_smooth)[ivar]->SetName(histName);
1164 (*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
1167 (*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
1182 (*fVarHistB_smooth)[ivar] = (
TH1F*)(*fVarHistB)[ivar]->
Clone();
1183 histTitle = (*fInputVars)[ivar]+
" background training smoothed ";
1184 histTitle += nsmooth;
1185 histTitle +=
" times";
1186 histName = (*fInputVars)[ivar]+
"_bgd_smooth";
1187 (*fVarHistB_smooth)[ivar]->SetName(histName);
1188 (*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
1191 (*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
1194 (*fVarPdfS)[ivar] =
new PDF(
TString(GetName()) +
" PDF Var Sig " + GetInputVar( ivar ), (*fVarHistS_smooth)[ivar],
PDF::kSpline2 );
1195 (*fVarPdfB)[ivar] =
new PDF(
TString(GetName()) +
" PDF Var Bkg " + GetInputVar( ivar ), (*fVarHistB_smooth)[ivar],
PDF::kSpline2 );
1208 istr >> dummy >>
dummy;
1210 istr >> dummy >> fNbins;
1213 istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> dummyInt >>
dummy ;
1216 if (dummyInt !=
Data()->GetNVariables()) {
1217 Log() <<
kFATAL <<
"<ReadWeightsFromStream> fatal error: mismatch "
1218 <<
"in number of variables: " << dummyInt <<
" != " <<
Data()->GetNVariables() <<
Endl;
1223 if (fFitMethod == kUseMonteCarlo) {
1224 Log() <<
kINFO <<
"Read cuts optimised using sample of MC events" <<
Endl;
1226 else if (fFitMethod == kUseMonteCarloEvents) {
1227 Log() <<
kINFO <<
"Read cuts optimised using sample of MC events" <<
Endl;
1229 else if (fFitMethod == kUseGeneticAlgorithm) {
1230 Log() <<
kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1232 else if (fFitMethod == kUseSimulatedAnnealing) {
1233 Log() <<
kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1235 else if (fFitMethod == kUseEventScan) {
1236 Log() <<
kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1241 Log() <<
kINFO <<
"in " << fNbins <<
" signal efficiency bins and for " << GetNvar() <<
" variables" <<
Endl;
1245 istr.getline(buffer,200);
1246 istr.getline(buffer,200);
1250 if (fEffBvsSLocal != 0)
delete fEffBvsSLocal;
1251 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
1252 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1253 fEffBvsSLocal->SetDirectory(0);
1255 for (
Int_t ibin=0; ibin<fNbins; ibin++) {
1256 istr >> tmpbin >> tmpeffS >> tmpeffB;
1257 fEffBvsSLocal->SetBinContent( ibin+1, tmpeffB );
1259 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1260 istr >> fCutMin[ivar][ibin] >> fCutMax[ivar][ibin];
1264 fEffSMin = fEffBvsSLocal->GetBinCenter(1);
1265 fEffSMax = fEffBvsSLocal->GetBinCenter(fNbins);
1275 std::vector<Double_t> cutsMin;
1276 std::vector<Double_t> cutsMax;
1282 gTools().
AddComment( wght,
Form(
"Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]", GetNvar() ) );
1292 for (
Int_t ibin=0; ibin<fNbins; ibin++) {
1293 Double_t effS = fEffBvsSLocal->GetBinCenter ( ibin + 1 );
1294 Double_t trueEffS = GetCuts( effS, cutsMin, cutsMax );
1295 if (
TMath::Abs(trueEffS) < 1e-10) trueEffS = 0;
1300 gTools().
AddAttr( binxml,
"effB", fEffBvsSLocal->GetBinContent( ibin + 1 ) );
1302 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1315 for (
UInt_t i=0; i<GetNvar(); i++) {
1316 if (fCutMin[i] != 0)
delete [] fCutMin[i];
1317 if (fCutMax[i] != 0)
delete [] fCutMax[i];
1319 if (fCutMin != 0)
delete [] fCutMin;
1320 if (fCutMax != 0)
delete [] fCutMax;
1322 Int_t tmpEffMethod, tmpFitMethod;
1323 gTools().
ReadAttr( wghtnode,
"OptimisationMethod", tmpEffMethod );
1331 if (fFitMethod == kUseMonteCarlo) {
1332 Log() <<
kINFO <<
"Read cuts optimised using sample of MC events" <<
Endl;
1334 else if (fFitMethod == kUseMonteCarloEvents) {
1335 Log() <<
kINFO <<
"Read cuts optimised using sample of MC-Event events" <<
Endl;
1337 else if (fFitMethod == kUseGeneticAlgorithm) {
1338 Log() <<
kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1340 else if (fFitMethod == kUseSimulatedAnnealing) {
1341 Log() <<
kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1343 else if (fFitMethod == kUseEventScan) {
1344 Log() <<
kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1349 Log() <<
kINFO <<
"Reading " << fNbins <<
" signal efficiency bins for " << GetNvar() <<
" variables" <<
Endl;
1351 delete fEffBvsSLocal;
1352 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
1353 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1354 fEffBvsSLocal->SetDirectory(0);
1355 for (
Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
1357 fCutMin =
new Double_t*[GetNvar()];
1358 fCutMax =
new Double_t*[GetNvar()];
1359 for (
UInt_t i=0;i<GetNvar();i++) {
1379 if (tmpbin-1 >= fNbins || tmpbin-1 < 0) {
1380 Log() <<
kFATAL <<
"Mismatch in bins: " << tmpbin-1 <<
" >= " << fNbins <<
Endl;
1383 fEffBvsSLocal->SetBinContent( tmpbin, tmpeffB );
1385 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1398 Log() <<
kINFO <<
"Write monitoring histograms to file: " << BaseDir()->GetPath() <<
Endl;
1400 fEffBvsSLocal->
Write();
1403 if (fEffMethod == kUsePDFs) {
1404 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1405 (*fVarHistS)[ivar]->Write();
1406 (*fVarHistB)[ivar]->Write();
1407 (*fVarHistS_smooth)[ivar]->Write();
1408 (*fVarHistB_smooth)[ivar]->Write();
1409 (*fVarPdfS)[ivar]->GetPDFHist()->Write();
1410 (*fVarPdfB)[ivar]->GetPDFHist()->Write();
1432 Log() <<
kFATAL <<
"<GetTrainingEfficiency> wrong number of arguments"
1433 <<
" in string: " << theString
1434 <<
" | required format, e.g., Efficiency:0.05" <<
Endl;
1447 if (results->
GetHist(
"EFF_BVSS_TR")==0) {
1449 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
1450 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
1455 fBinaryTreeB->Fill( GetEventCollection(
Types::kTraining), fBackgroundClass );
1463 TH1* eff_bvss_tr =
new TH1F( GetTestvarName() +
"_trainingEffBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1465 TH1* rej_bvss_tr =
new TH1F( GetTestvarName() +
"_trainingRejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1467 results->
Store(eff_bvss_tr,
"EFF_BVSS_TR");
1468 results->
Store(rej_bvss_tr,
"REJ_BVSS_TR");
1475 Int_t nFailedBins=0;
1476 for (
Int_t bini=1; bini<=fNbins; bini++) {
1477 for (
UInt_t ivar=0; ivar <GetNvar(); ivar++){
1478 tmpCutMin[ivar] = fCutMin[ivar][bini-1];
1479 tmpCutMax[ivar] = fCutMax[ivar][bini-1];
1483 this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
1486 if (effBin != bini){
1487 Log()<<
kVERBOSE <<
"unable to fill efficiency bin " << bini<<
" " << effBin <<
Endl;
1496 if (nFailedBins>0)
Log()<<
kWARNING <<
" unable to fill "<< nFailedBins <<
" efficiency bins " <<
Endl;
1498 delete [] tmpCutMin;
1499 delete [] tmpCutMax;
1502 fSplTrainEffBvsS =
new TSpline1(
"trainEffBvsS",
new TGraph( eff_bvss_tr ) );
1506 if (
NULL == fSplTrainEffBvsS)
return 0.0;
1509 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
1510 Int_t nbins_ = 1000;
1513 for (
Int_t bini=1; bini<=nbins_; bini++) {
1515 effS = (bini - 0.5)/
Float_t(nbins_);
1516 effB = fSplTrainEffBvsS->Eval( effS );
1519 if ((effB - effBref)*(effB_ - effBref) < 0)
break;
1524 return 0.5*(effS + effS_);
1540 Data()->SetCurrentType(type);
1549 Log() <<
kFATAL <<
"<GetEfficiency> wrong number of arguments"
1550 <<
" in string: " << theString
1551 <<
" | required format, e.g., Efficiency:0.05, or empty string" <<
Endl;
1566 if (results->
GetHist(
"MVA_EFF_BvsS")==0) {
1568 if (fBinaryTreeS!=0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
1569 if (fBinaryTreeB!=0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
1575 fBinaryTreeS->Fill( GetEventCollection(
Types::kTesting), fSignalClass );
1577 fBinaryTreeB->Fill( GetEventCollection(
Types::kTesting), fBackgroundClass );
1586 TH1* eff_BvsS =
new TH1F( GetTestvarName() +
"_effBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1588 TH1* rej_BvsS =
new TH1F( GetTestvarName() +
"_rejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1590 results->
Store(eff_BvsS,
"MVA_EFF_BvsS");
1591 results->
Store(rej_BvsS);
1596 TH1* eff_s =
new TH1F( GetTestvarName() +
"_effS", GetTestvarName() +
" (signal)", fNbins, xmin, xmax);
1598 TH1* eff_b =
new TH1F( GetTestvarName() +
"_effB", GetTestvarName() +
" (background)", fNbins, xmin, xmax);
1600 results->
Store(eff_s,
"MVA_S");
1601 results->
Store(eff_b,
"MVA_B");
1611 for (
Int_t bini=1; bini<=fNbins; bini++) {
1612 for (
UInt_t ivar=0; ivar <GetNvar(); ivar++) {
1613 tmpCutMin[ivar] = fCutMin[ivar][bini-1];
1614 tmpCutMax[ivar] = fCutMax[ivar][bini-1];
1618 this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
1619 tmpBvsS->
SetPoint(bini, effS, effB);
1624 tmpBvsS->
SetPoint(fNbins+1, 1., 1.);
1626 delete [] tmpCutMin;
1627 delete [] tmpCutMax;
1630 fSpleffBvsS =
new TSpline1(
"effBvsS", tmpBvsS );
1631 for (
Int_t bini=1; bini<=fNbins; bini++) {
1633 Double_t effB = fSpleffBvsS->Eval( effS );
1640 if (
NULL == fSpleffBvsS)
return 0.0;
1643 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
1644 Int_t nbins_ = 1000;
1650 for (
Int_t bini=1; bini<=nbins_; bini++) {
1653 effS = (bini - 0.5)/
Float_t(nbins_);
1654 effB = fSpleffBvsS->Eval( effS );
1655 integral += (1.0 - effB);
1664 for (
Int_t bini=1; bini<=nbins_; bini++) {
1666 effS = (bini - 0.5)/
Float_t(nbins_);
1667 effB = fSpleffBvsS->Eval( effS );
1670 if ((effB - effBref)*(effB_ - effBref) < 0)
break;
1675 effS = 0.5*(effS + effS_);
1677 if (
Data()->GetNEvtSigTest() > 0)
1692 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
1693 fout <<
"};" << std::endl;
1711 Log() <<
"The optimisation of rectangular cuts performed by TMVA maximises " <<
Endl;
1712 Log() <<
"the background rejection at given signal efficiency, and scans " <<
Endl;
1713 Log() <<
"over the full range of the latter quantity. Three optimisation" <<
Endl;
1714 Log() <<
"methods are optional: Monte Carlo sampling (MC), a Genetics" <<
Endl;
1715 Log() <<
"Algorithm (GA), and Simulated Annealing (SA). GA and SA are" <<
Endl;
1716 Log() <<
"expected to perform best." <<
Endl;
1718 Log() <<
"The difficulty to find the optimal cuts strongly increases with" <<
Endl;
1719 Log() <<
"the dimensionality (number of input variables) of the problem." <<
Endl;
1720 Log() <<
"This behavior is due to the non-uniqueness of the solution space."<<
Endl;
1724 Log() <<
"If the dimensionality exceeds, say, 4 input variables, it is " <<
Endl;
1725 Log() <<
"advisable to scrutinize the separation power of the variables," <<
Endl;
1726 Log() <<
"and to remove the weakest ones. If some among the input variables" <<
Endl;
1727 Log() <<
"can be described by a single cut (e.g., because signal tends to be" <<
Endl;
1728 Log() <<
"larger than background), this can be indicated to MethodCuts via" <<
Endl;
1729 Log() <<
"the \"Fsmart\" options (see option string). Choosing this option" <<
Endl;
1730 Log() <<
"reduces the number of requirements for the variable from 2 (min/max)" <<
Endl;
1731 Log() <<
"to a single one (TMVA finds out whether it is to be interpreted as" <<
Endl;
1732 Log() <<
"min or max)." <<
Endl;
1736 Log() << bold <<
"Monte Carlo sampling:" << resbold <<
Endl;
1738 Log() <<
"Apart form the \"Fsmart\" option for the variables, the only way" <<
Endl;
1739 Log() <<
"to improve the MC sampling is to increase the sampling rate. This" <<
Endl;
1740 Log() <<
"is done via the configuration option \"MC_NRandCuts\". The execution" <<
Endl;
1741 Log() <<
"time scales linearly with the sampling rate." <<
Endl;
1743 Log() << bold <<
"Genetic Algorithm:" << resbold <<
Endl;
1745 Log() <<
"The algorithm terminates if no significant fitness increase has" <<
Endl;
1746 Log() <<
"been achieved within the last \"nsteps\" steps of the calculation." <<
Endl;
1747 Log() <<
"Wiggles in the ROC curve or constant background rejection of 1" <<
Endl;
1748 Log() <<
"indicate that the GA failed to always converge at the true maximum" <<
Endl;
1749 Log() <<
"fitness. In such a case, it is recommended to broaden the search " <<
Endl;
1750 Log() <<
"by increasing the population size (\"popSize\") and to give the GA " <<
Endl;
1751 Log() <<
"more time to find improvements by increasing the number of steps" <<
Endl;
1752 Log() <<
"(\"nsteps\")" <<
Endl;
1753 Log() <<
" -> increase \"popSize\" (at least >10 * number of variables)" <<
Endl;
1754 Log() <<
" -> increase \"nsteps\"" <<
Endl;
1756 Log() << bold <<
"Simulated Annealing (SA) algorithm:" << resbold <<
Endl;
1758 Log() <<
"\"Increasing Adaptive\" approach:" <<
Endl;
1760 Log() <<
"The algorithm seeks local minima and explores their neighborhood, while" <<
Endl;
1761 Log() <<
"changing the ambient temperature depending on the number of failures" <<
Endl;
1762 Log() <<
"in the previous steps. The performance can be improved by increasing" <<
Endl;
1763 Log() <<
"the number of iteration steps (\"MaxCalls\"), or by adjusting the" <<
Endl;
1764 Log() <<
"minimal temperature (\"MinTemperature\"). Manual adjustments of the" <<
Endl;
1765 Log() <<
"speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" <<
Endl;
1766 Log() <<
"to individual data sets should also help. Summary:" << brk <<
Endl;
1767 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1768 Log() <<
" -> adjust \"MinTemperature\"" << brk <<
Endl;
1769 Log() <<
" -> adjust \"TemperatureScale\"" << brk <<
Endl;
1770 Log() <<
" -> adjust \"AdaptiveSpeed\"" <<
Endl;
1772 Log() <<
"\"Decreasing Adaptive\" approach:" <<
Endl;
1774 Log() <<
"The algorithm calculates the initial temperature (based on the effect-" <<
Endl;
1775 Log() <<
"iveness of large steps) and the multiplier that ensures to reach the" <<
Endl;
1776 Log() <<
"minimal temperature with the requested number of iteration steps." <<
Endl;
1777 Log() <<
"The performance can be improved by adjusting the minimal temperature" <<
Endl;
1778 Log() <<
" (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" << brk <<
Endl;
1779 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1780 Log() <<
" -> adjust \"MinTemperature\"" <<
Endl;
1782 Log() <<
"Other kernels:" <<
Endl;
1784 Log() <<
"Alternative ways of counting the temperature change are implemented. " <<
Endl;
1785 Log() <<
"Each of them starts with the maximum temperature (\"MaxTemperature\")" <<
Endl;
1786 Log() <<
"and descreases while changing the temperature according to a given" <<
Endl;
1787 Log() <<
"prescription:" << brk <<
Endl;
1788 Log() <<
"CurrentTemperature =" << brk <<
Endl;
1789 Log() <<
" - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" << brk <<
Endl;
1790 Log() <<
" - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" << brk <<
Endl;
1791 Log() <<
" - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" << brk <<
Endl;
1792 Log() <<
" - Sin: (Sin(StepNumber / TemperatureScale) + 1) / (StepNumber + 1)*InitialTemperature + Eps" << brk <<
Endl;
1793 Log() <<
" - Geo: CurrentTemperature * TemperatureScale" <<
Endl;
1795 Log() <<
"Their performance can be improved by adjusting initial temperature" <<
Endl;
1796 Log() <<
"(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," <<
Endl;
1797 Log() <<
"and the multiplier that scales the termperature descrease" <<
Endl;
1798 Log() <<
"(\"TemperatureScale\")" << brk <<
Endl;
1799 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1800 Log() <<
" -> adjust \"InitialTemperature\"" << brk <<
Endl;
1801 Log() <<
" -> adjust \"TemperatureScale\"" << brk <<
Endl;
1802 Log() <<
" -> adjust \"KernelTemperature\"" <<
Endl;
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
Random number generator class based on M.
MsgLogger & Endl(MsgLogger &ml)
TH1 * GetHist(const TString &alias) const
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA there are two requirements: 1) the signal efficiency m...
void TestClassification()
nothing to test
Collectable string class.
void CheckForUnusedOptions() const
checks for unused options in option string
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
void Clone(Ssiz_t nc)
Make self a distinct copy with capacity of at least tot, where tot cannot be smaller than the current...
1-D histogram with a float per channel (see TH1 documentation)}
Short_t Min(Short_t a, Short_t b)
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
const char * Data() const
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
Double_t Run()
estimator function interface for fitting
void Init(void)
default initialisation called by all constructors
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample ...
std::vector< std::vector< double > > Data
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
void Train(void)
training method: here the cuts are optimised for the training sample
std::string GetMethodName(TCppMethod_t)
MethodCuts(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="MC:150:10000:", TDirectory *theTargetFile=0)
void PrintCuts(Double_t effS) const
print cuts
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
char * Form(const char *fmt,...)
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: Method
virtual Int_t FindBin(Double_t x)
Find bin number corresponding to abscissa x.
virtual Int_t GetSize() const
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
Describe directory structure in memory.
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
static RooMathCoreReg dummy
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
void GetHelpMessage() const
get help message text
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i.
virtual ~MethodCuts(void)
destructor
Short_t Max(Short_t a, Short_t b)
A Graph is a graphics object made of two arrays X and Y with npoints each.
void ProcessOptions()
process user options sanity check, do not allow the input variables to be normalised, because this only creates problems when interpreting the cuts
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Bool_t WriteOptionsReference() const
void Store(TObject *obj, const char *alias=0)
Double_t Sqrt(Double_t x)
Double_t GetTrainingEfficiency(const TString &)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.