#include <iostream>
#include <cstdlib>
#include "Riostream.h"
#include "TH1F.h"
#include "TObjString.h"
#include "TDirectory.h"
#include "TMath.h"
#include "TGraph.h"
#include "TSpline.h"
#include "TRandom3.h"
#include "TMVA/ClassifierFactory.h"
#include "TMVA/MethodCuts.h"
#include "TMVA/GeneticFitter.h"
#include "TMVA/MinuitFitter.h"
#include "TMVA/MCFitter.h"
#include "TMVA/SimulatedAnnealingFitter.h"
#include "TMVA/PDF.h"
#include "TMVA/Tools.h"
#include "TMVA/Timer.h"
#include "TMVA/Interval.h"
#include "TMVA/TSpline1.h"
#include "TMVA/Config.h"
#include "TMVA/VariableTransformBase.h"
#include "TMVA/Results.h"
using std::atof;
REGISTER_METHOD(Cuts)
ClassImp(TMVA::MethodCuts)
const Double_t TMVA::MethodCuts::fgMaxAbsCutVal = 1.0e30;
TMVA::MethodCuts::MethodCuts( const TString& jobName,
const TString& methodTitle,
DataSetInfo& theData,
const TString& theOption,
TDirectory* theTargetDir ) :
MethodBase( jobName, Types::kCuts, methodTitle, theData, theOption, theTargetDir ),
fFitMethod ( kUseGeneticAlgorithm ),
fEffMethod ( kUseEventSelection ),
fFitParams (0),
fTestSignalEff(0.7),
fEffSMin ( 0 ),
fEffSMax ( 0 ),
fCutRangeMin( 0 ),
fCutRangeMax( 0 ),
fBinaryTreeS( 0 ),
fBinaryTreeB( 0 ),
fCutMin ( 0 ),
fCutMax ( 0 ),
fTmpCutMin ( 0 ),
fTmpCutMax ( 0 ),
fAllVarsI ( 0 ),
fNpar ( 0 ),
fEffRef ( 0 ),
fRangeSign ( 0 ),
fRandom ( 0 ),
fMeanS ( 0 ),
fMeanB ( 0 ),
fRmsS ( 0 ),
fRmsB ( 0 ),
fEffBvsSLocal( 0 ),
fVarHistS ( 0 ),
fVarHistB ( 0 ),
fVarHistS_smooth( 0 ),
fVarHistB_smooth( 0 ),
fVarPdfS ( 0 ),
fVarPdfB ( 0 ),
fNegEffWarning( kFALSE )
{
}
TMVA::MethodCuts::MethodCuts( DataSetInfo& theData,
const TString& theWeightFile,
TDirectory* theTargetDir ) :
MethodBase( Types::kCuts, theData, theWeightFile, theTargetDir ),
fFitMethod ( kUseGeneticAlgorithm ),
fEffMethod ( kUseEventSelection ),
fFitParams (0),
fTestSignalEff(0.7),
fEffSMin ( 0 ),
fEffSMax ( 0 ),
fCutRangeMin( 0 ),
fCutRangeMax( 0 ),
fBinaryTreeS( 0 ),
fBinaryTreeB( 0 ),
fCutMin ( 0 ),
fCutMax ( 0 ),
fTmpCutMin ( 0 ),
fTmpCutMax ( 0 ),
fAllVarsI ( 0 ),
fNpar ( 0 ),
fEffRef ( 0 ),
fRangeSign ( 0 ),
fRandom ( 0 ),
fMeanS ( 0 ),
fMeanB ( 0 ),
fRmsS ( 0 ),
fRmsB ( 0 ),
fEffBvsSLocal( 0 ),
fVarHistS ( 0 ),
fVarHistB ( 0 ),
fVarHistS_smooth( 0 ),
fVarHistB_smooth( 0 ),
fVarPdfS ( 0 ),
fVarPdfB ( 0 ),
fNegEffWarning( kFALSE )
{
}
Bool_t TMVA::MethodCuts::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses,
UInt_t )
{
return (type == Types::kClassification && numberClasses == 2);
}
void TMVA::MethodCuts::Init( void )
{
fVarHistS = fVarHistB = 0;
fVarHistS_smooth = fVarHistB_smooth = 0;
fVarPdfS = fVarPdfB = 0;
fFitParams = 0;
fBinaryTreeS = fBinaryTreeB = 0;
fEffSMin = 0;
fEffSMax = 0;
fNpar = 2*GetNvar();
fRangeSign = new std::vector<Int_t> ( GetNvar() );
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*fRangeSign)[ivar] = +1;
fMeanS = new std::vector<Double_t>( GetNvar() );
fMeanB = new std::vector<Double_t>( GetNvar() );
fRmsS = new std::vector<Double_t>( GetNvar() );
fRmsB = new std::vector<Double_t>( GetNvar() );
fFitParams = new std::vector<EFitParameters>( GetNvar() );
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*fFitParams)[ivar] = kNotEnforced;
fFitMethod = kUseMonteCarlo;
fTestSignalEff = -1;
fCutMin = new Double_t*[GetNvar()];
fCutMax = new Double_t*[GetNvar()];
for (UInt_t i=0; i<GetNvar(); i++) {
fCutMin[i] = new Double_t[fNbins];
fCutMax[i] = new Double_t[fNbins];
}
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
for (Int_t ibin=0; ibin<fNbins; ibin++) {
fCutMin[ivar][ibin] = 0;
fCutMax[ivar][ibin] = 0;
}
}
fTmpCutMin = new Double_t[GetNvar()];
fTmpCutMax = new Double_t[GetNvar()];
}
TMVA::MethodCuts::~MethodCuts( void )
{
delete fRangeSign;
delete fMeanS;
delete fMeanB;
delete fRmsS;
delete fRmsB;
delete fFitParams;
delete fEffBvsSLocal;
if (NULL != fCutRangeMin) delete [] fCutRangeMin;
if (NULL != fCutRangeMax) delete [] fCutRangeMax;
if (NULL != fAllVarsI) delete [] fAllVarsI;
for (UInt_t i=0;i<GetNvar();i++) {
if (NULL != fCutMin[i] ) delete [] fCutMin[i];
if (NULL != fCutMax[i] ) delete [] fCutMax[i];
if (NULL != fCutRange[i]) delete fCutRange[i];
}
if (NULL != fCutMin) delete [] fCutMin;
if (NULL != fCutMax) delete [] fCutMax;
if (NULL != fTmpCutMin) delete [] fTmpCutMin;
if (NULL != fTmpCutMax) delete [] fTmpCutMax;
if (NULL != fBinaryTreeS) delete fBinaryTreeS;
if (NULL != fBinaryTreeB) delete fBinaryTreeB;
}
void TMVA::MethodCuts::DeclareOptions()
{
DeclareOptionRef(fFitMethodS = "GA", "FitMethod", "Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
AddPreDefVal(TString("GA"));
AddPreDefVal(TString("SA"));
AddPreDefVal(TString("MC"));
AddPreDefVal(TString("MCEvents"));
AddPreDefVal(TString("MINUIT"));
AddPreDefVal(TString("EventScan"));
DeclareOptionRef(fEffMethodS = "EffSel", "EffMethod", "Selection Method");
AddPreDefVal(TString("EffSel"));
AddPreDefVal(TString("EffPDF"));
fCutRange.resize(GetNvar());
fCutRangeMin = new Double_t[GetNvar()];
fCutRangeMax = new Double_t[GetNvar()];
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
fCutRange[ivar] = 0;
fCutRangeMin[ivar] = fCutRangeMax[ivar] = -1;
}
DeclareOptionRef( fCutRangeMin, GetNvar(), "CutRangeMin", "Minimum of allowed cut range (set per variable)" );
DeclareOptionRef( fCutRangeMax, GetNvar(), "CutRangeMax", "Maximum of allowed cut range (set per variable)" );
fAllVarsI = new TString[GetNvar()];
for (UInt_t i=0; i<GetNvar(); i++) fAllVarsI[i] = "NotEnforced";
DeclareOptionRef(fAllVarsI, GetNvar(), "VarProp", "Categorisation of cuts");
AddPreDefVal(TString("NotEnforced"));
AddPreDefVal(TString("FMax"));
AddPreDefVal(TString("FMin"));
AddPreDefVal(TString("FSmart"));
}
void TMVA::MethodCuts::ProcessOptions()
{
if (IsNormalised()) {
Log() << kWARNING << "Normalisation of the input variables for cut optimisation is not" << Endl;
Log() << kWARNING << "supported because this provides intransparent cut values, and no" << Endl;
Log() << kWARNING << "improvement in the performance of the algorithm." << Endl;
Log() << kWARNING << "Please remove \"Normalise\" option from booking option string" << Endl;
Log() << kWARNING << "==> Will reset normalisation flag to \"False\"" << Endl;
SetNormalised( kFALSE );
}
if (IgnoreEventsWithNegWeightsInTraining()) {
Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
<< GetMethodTypeName()
<< " --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string."
<< Endl;
}
if (fFitMethodS == "MC" ) fFitMethod = kUseMonteCarlo;
else if (fFitMethodS == "MCEvents") fFitMethod = kUseMonteCarloEvents;
else if (fFitMethodS == "GA" ) fFitMethod = kUseGeneticAlgorithm;
else if (fFitMethodS == "SA" ) fFitMethod = kUseSimulatedAnnealing;
else if (fFitMethodS == "MINUIT" ) {
fFitMethod = kUseMinuit;
Log() << kWARNING << "poor performance of MINUIT in MethodCuts; preferred fit method: GA" << Endl;
}
else if (fFitMethodS == "EventScan" ) fFitMethod = kUseEventScan;
else Log() << kFATAL << "unknown minimisation method: " << fFitMethodS << Endl;
if (fEffMethodS == "EFFSEL" ) fEffMethod = kUseEventSelection;
else if (fEffMethodS == "EFFPDF" ) fEffMethod = kUsePDFs;
else fEffMethod = kUseEventSelection;
Log() << kINFO << Form("Use optimization method: \"%s\"",
(fFitMethod == kUseMonteCarlo) ? "Monte Carlo" :
(fFitMethod == kUseMonteCarlo) ? "Monte-Carlo-Event sampling" :
(fFitMethod == kUseEventScan) ? "Full Event Scan (slow)" :
(fFitMethod == kUseMinuit) ? "MINUIT" : "Genetic Algorithm" ) << Endl;
Log() << kINFO << Form("Use efficiency computation method: \"%s\"",
(fEffMethod == kUseEventSelection) ? "Event Selection" : "PDF" ) << Endl;
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
fCutRange[ivar] = new Interval( fCutRangeMin[ivar], fCutRangeMax[ivar] );
}
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
EFitParameters theFitP = kNotEnforced;
if (fAllVarsI[ivar] == "" || fAllVarsI[ivar] == "NotEnforced") theFitP = kNotEnforced;
else if (fAllVarsI[ivar] == "FMax" ) theFitP = kForceMax;
else if (fAllVarsI[ivar] == "FMin" ) theFitP = kForceMin;
else if (fAllVarsI[ivar] == "FSmart" ) theFitP = kForceSmart;
else {
Log() << kFATAL << "unknown value \'" << fAllVarsI[ivar]
<< "\' for fit parameter option " << Form("VarProp[%i]",ivar) << Endl;
}
(*fFitParams)[ivar] = theFitP;
if (theFitP != kNotEnforced)
Log() << kINFO << "Use \"" << fAllVarsI[ivar]
<< "\" cuts for variable: " << "'" << (*fInputVars)[ivar] << "'" << Endl;
}
}
Double_t TMVA::MethodCuts::GetMvaValue( Double_t* err, Double_t* errUpper )
{
NoErrorCalc(err, errUpper);
if (fCutMin == NULL || fCutMax == NULL || fNbins == 0) {
Log() << kFATAL << "<Eval_Cuts> fCutMin/Max have zero pointer. "
<< "Did you book Cuts ?" << Endl;
}
const Event* ev = GetEvent();
if (fTestSignalEff > 0) {
Int_t ibin = fEffBvsSLocal->FindBin( fTestSignalEff );
if (ibin < 0 ) ibin = 0;
else if (ibin >= fNbins) ibin = fNbins - 1;
Bool_t passed = kTRUE;
for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
passed &= ( (ev->GetValue(ivar) > fCutMin[ivar][ibin]) &&
(ev->GetValue(ivar) <= fCutMax[ivar][ibin]) );
return passed ? 1. : 0. ;
}
else return 0;
}
void TMVA::MethodCuts::PrintCuts( Double_t effS ) const
{
std::vector<Double_t> cutsMin;
std::vector<Double_t> cutsMax;
Int_t ibin = fEffBvsSLocal->FindBin( effS );
Double_t trueEffS = GetCuts( effS, cutsMin, cutsMax );
std::vector<TString>* varVec = 0;
if (GetTransformationHandler().GetNumOfTransformations() == 0) {
varVec = new std::vector<TString>;
for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
varVec->push_back( DataInfo().GetVariableInfo(ivar).GetLabel() );
}
}
else if (GetTransformationHandler().GetNumOfTransformations() == 1) {
varVec = GetTransformationHandler().GetTransformationStringsOfLastTransform();
}
else {
varVec = new std::vector<TString>;
for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
varVec->push_back( DataInfo().GetVariableInfo(ivar).GetLabel() + " [transformed]" );
}
}
UInt_t maxL = 0;
for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
if ((UInt_t)(*varVec)[ivar].Length() > maxL) maxL = (*varVec)[ivar].Length();
}
UInt_t maxLine = 20+maxL+16;
for (UInt_t i=0; i<maxLine; i++) Log() << "-";
Log() << Endl;
Log() << kINFO << "Cut values for requested signal efficiency: " << trueEffS << Endl;
Log() << kINFO << "Corresponding background efficiency : " << fEffBvsSLocal->GetBinContent( ibin ) << Endl;
if (GetTransformationHandler().GetNumOfTransformations() == 1) {
Log() << kINFO << "Transformation applied to input variables : \""
<< GetTransformationHandler().GetNameOfLastTransform() << "\"" << Endl;
}
else if (GetTransformationHandler().GetNumOfTransformations() > 1) {
Log() << kINFO << "[ More than one (=" << GetTransformationHandler().GetNumOfTransformations() << ") "
<< " transformations applied in transformation chain; cuts applied on transformed quantities ] " << Endl;
}
else {
Log() << kINFO << "Transformation applied to input variables : None" << Endl;
}
for (UInt_t i=0; i<maxLine; i++) Log() << "-";
Log() << Endl;
for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
Log() << kINFO
<< "Cut[" << std::setw(2) << ivar << "]: "
<< std::setw(10) << cutsMin[ivar]
<< " < "
<< std::setw(maxL) << (*varVec)[ivar]
<< " <= "
<< std::setw(10) << cutsMax[ivar] << Endl;
}
for (UInt_t i=0; i<maxLine; i++) Log() << "-";
Log() << Endl;
delete varVec;
}
Double_t TMVA::MethodCuts::GetCuts( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const
{
std::vector<Double_t> cMin( GetNvar() );
std::vector<Double_t> cMax( GetNvar() );
Double_t trueEffS = GetCuts( effS, cMin, cMax );
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
cutMin[ivar] = cMin[ivar];
cutMax[ivar] = cMax[ivar];
}
return trueEffS;
}
Double_t TMVA::MethodCuts::GetCuts( Double_t effS,
std::vector<Double_t>& cutMin,
std::vector<Double_t>& cutMax ) const
{
Int_t ibin = fEffBvsSLocal->FindBin( effS );
Double_t trueEffS = fEffBvsSLocal->GetBinLowEdge( ibin );
ibin--;
if (ibin < 0 ) ibin = 0;
else if (ibin >= fNbins) ibin = fNbins - 1;
cutMin.clear();
cutMax.clear();
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
cutMin.push_back( fCutMin[ivar][ibin] );
cutMax.push_back( fCutMax[ivar][ibin] );
}
return trueEffS;
}
void TMVA::MethodCuts::Train( void )
{
if (fEffMethod == kUsePDFs) CreateVariablePDFs();
if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
fBinaryTreeS = new BinarySearchTree();
fBinaryTreeS->Fill( GetEventCollection(Types::kTraining), fSignalClass );
fBinaryTreeB = new BinarySearchTree();
fBinaryTreeB->Fill( GetEventCollection(Types::kTraining), fBackgroundClass );
for (UInt_t ivar =0; ivar < Data()->GetNVariables(); ivar++) {
(*fMeanS)[ivar] = fBinaryTreeS->Mean(Types::kSignal, ivar);
(*fRmsS)[ivar] = fBinaryTreeS->RMS (Types::kSignal, ivar);
(*fMeanB)[ivar] = fBinaryTreeB->Mean(Types::kBackground, ivar);
(*fRmsB)[ivar] = fBinaryTreeB->RMS (Types::kBackground, ivar);
Double_t xmin = TMath::Min(fBinaryTreeS->Min(Types::kSignal, ivar),
fBinaryTreeB->Min(Types::kBackground, ivar));
Double_t xmax = TMath::Max(fBinaryTreeS->Max(Types::kSignal, ivar),
fBinaryTreeB->Max(Types::kBackground, ivar));
Double_t eps = 0.01*(xmax - xmin);
xmin -= eps;
xmax += eps;
if (TMath::Abs(fCutRange[ivar]->GetMin() - fCutRange[ivar]->GetMax()) < 1.0e-300 ) {
fCutRange[ivar]->SetMin( xmin );
fCutRange[ivar]->SetMax( xmax );
}
else if (xmin > fCutRange[ivar]->GetMin()) fCutRange[ivar]->SetMin( xmin );
else if (xmax < fCutRange[ivar]->GetMax()) fCutRange[ivar]->SetMax( xmax );
}
std::vector<TH1F*> signalDist, bkgDist;
delete fEffBvsSLocal;
fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal",
TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
fEffBvsSLocal->SetDirectory(0);
for (Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
if (fFitMethod == kUseGeneticAlgorithm ||
fFitMethod == kUseMonteCarlo ||
fFitMethod == kUseMinuit ||
fFitMethod == kUseSimulatedAnnealing) {
std::vector<Interval*> ranges;
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
Int_t nbins = 0;
if (DataInfo().GetVariableInfo(ivar).GetVarType() == 'I') {
nbins = Int_t(fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin()) + 1;
}
if ((*fFitParams)[ivar] == kForceSmart) {
if ((*fMeanS)[ivar] > (*fMeanB)[ivar]) (*fFitParams)[ivar] = kForceMax;
else (*fFitParams)[ivar] = kForceMin;
}
if ((*fFitParams)[ivar] == kForceMin) {
ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMin(), nbins ) );
ranges.push_back( new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
}
else if ((*fFitParams)[ivar] == kForceMax) {
ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
ranges.push_back( new Interval( fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(),
fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
}
else {
ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
ranges.push_back( new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
}
}
FitterBase* fitter = NULL;
switch (fFitMethod) {
case kUseGeneticAlgorithm:
fitter = new GeneticFitter( *this, Form("%sFitter_GA", GetName()), ranges, GetOptions() );
break;
case kUseMonteCarlo:
fitter = new MCFitter ( *this, Form("%sFitter_MC", GetName()), ranges, GetOptions() );
break;
case kUseMinuit:
fitter = new MinuitFitter ( *this, Form("%sFitter_MINUIT", GetName()), ranges, GetOptions() );
break;
case kUseSimulatedAnnealing:
fitter = new SimulatedAnnealingFitter( *this, Form("%sFitter_SA", GetName()), ranges, GetOptions() );
break;
default:
Log() << kFATAL << "Wrong fit method: " << fFitMethod << Endl;
}
fitter->CheckForUnusedOptions();
fitter->Run();
for (UInt_t ivar=0; ivar<ranges.size(); ivar++) delete ranges[ivar];
delete fitter;
}
else if (fFitMethod == kUseEventScan) {
Int_t nevents = Data()->GetNEvents();
Int_t ic = 0;
Int_t nsamples = Int_t(0.5*nevents*(nevents - 1));
Timer timer( nsamples, GetName() );
Log() << kINFO << "Running full event scan: " << Endl;
for (Int_t ievt1=0; ievt1<nevents; ievt1++) {
for (Int_t ievt2=ievt1+1; ievt2<nevents; ievt2++) {
EstimatorFunction( ievt1, ievt2 );
ic++;
if ((nsamples<10000) || ic%10000 == 0) timer.DrawProgressBar( ic );
}
}
}
else if (fFitMethod == kUseMonteCarloEvents) {
Int_t nsamples = 200000;
UInt_t seed = 100;
DeclareOptionRef( nsamples, "SampleSize", "Number of Monte-Carlo-Event samples" );
DeclareOptionRef( seed, "Seed", "Seed for the random generator (0 takes random seeds)" );
ParseOptions();
Int_t nevents = Data()->GetNEvents();
Int_t ic = 0;
Timer timer( nsamples, GetName() );
TRandom3*rnd = new TRandom3( seed );
Log() << kINFO << "Running Monte-Carlo-Event sampling over " << nsamples << " events" << Endl;
std::vector<Double_t> pars( 2*GetNvar() );
for (Int_t itoy=0; itoy<nsamples; itoy++) {
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
Bool_t isSignal = kFALSE;
Int_t ievt1, ievt2;
Double_t evt1, evt2;
Int_t nbreak = 0;
while (!isSignal) {
ievt1 = Int_t(rnd->Uniform(0.,1.)*nevents);
ievt2 = Int_t(rnd->Uniform(0.,1.)*nevents);
const Event *ev1 = GetEvent(ievt1);
isSignal = DataInfo().IsSignal(ev1);
evt1 = ev1->GetValue( ivar );
const Event *ev2 = GetEvent(ievt2);
isSignal &= DataInfo().IsSignal(ev2);
evt2 = ev2->GetValue( ivar );
if (nbreak++ > 10000) Log() << kFATAL << "<MCEvents>: could not find signal events"
<< " after 10000 trials - do you have signal events in your sample ?"
<< Endl;
isSignal = 1;
}
if (evt1 > evt2) { Double_t z = evt1; evt1 = evt2; evt2 = z; }
pars[2*ivar] = evt1;
pars[2*ivar+1] = evt2 - evt1;
}
EstimatorFunction( pars );
ic++;
if ((nsamples<1000) || ic%1000 == 0) timer.DrawProgressBar( ic );
}
delete rnd;
}
else Log() << kFATAL << "Unknown minimisation method: " << fFitMethod << Endl;
if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
for (Int_t ibin=0; ibin<fNbins; ibin++) {
if ((*fFitParams)[ivar] == kForceMin && fCutMin[ivar][ibin] > -fgMaxAbsCutVal) {
fCutMin[ivar][ibin] = -fgMaxAbsCutVal;
}
if ((*fFitParams)[ivar] == kForceMax && fCutMax[ivar][ibin] < fgMaxAbsCutVal) {
fCutMax[ivar][ibin] = fgMaxAbsCutVal;
}
}
}
Double_t epsilon = 0.0001;
for (Double_t eff=0.1; eff<0.95; eff += 0.1) PrintCuts( eff+epsilon );
}
void TMVA::MethodCuts::TestClassification()
{
}
Double_t TMVA::MethodCuts::EstimatorFunction( Int_t ievt1, Int_t ievt2 )
{
const Event *ev1 = GetEvent(ievt1);
if (!DataInfo().IsSignal(ev1)) return -1;
const Event *ev2 = GetEvent(ievt2);
if (!DataInfo().IsSignal(ev2)) return -1;
const Int_t nvar = GetNvar();
Double_t* evt1 = new Double_t[nvar];
Double_t* evt2 = new Double_t[nvar];
for (Int_t ivar=0; ivar<nvar; ivar++) {
evt1[ivar] = ev1->GetValue( ivar );
evt2[ivar] = ev2->GetValue( ivar );
}
std::vector<Double_t> pars;
for (Int_t ivar=0; ivar<nvar; ivar++) {
Double_t cutMin;
Double_t cutMax;
if (evt1[ivar] < evt2[ivar]) {
cutMin = evt1[ivar];
cutMax = evt2[ivar];
}
else {
cutMin = evt2[ivar];
cutMax = evt1[ivar];
}
pars.push_back( cutMin );
pars.push_back( cutMax - cutMin );
}
delete [] evt1;
delete [] evt2;
return ComputeEstimator( pars );
}
Double_t TMVA::MethodCuts::EstimatorFunction( std::vector<Double_t>& pars )
{
return ComputeEstimator( pars );
}
Double_t TMVA::MethodCuts::ComputeEstimator( std::vector<Double_t>& pars )
{
Double_t effS = 0, effB = 0;
this->MatchParsToCuts( pars, &fTmpCutMin[0], &fTmpCutMax[0] );
switch (fEffMethod) {
case kUsePDFs:
this->GetEffsfromPDFs (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
break;
case kUseEventSelection:
this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
break;
default:
this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
}
Double_t eta = 0;
Int_t ibinS = fEffBvsSLocal->FindBin( effS );
Double_t effBH = fEffBvsSLocal->GetBinContent( ibinS );
Double_t effBH_left = (ibinS > 1 ) ? fEffBvsSLocal->GetBinContent( ibinS-1 ) : effBH;
Double_t effBH_right = (ibinS < fNbins) ? fEffBvsSLocal->GetBinContent( ibinS+1 ) : effBH;
Double_t average = 0.5*(effBH_left + effBH_right);
if (effBH < effB) average = effBH;
eta = ( -TMath::Abs(effBH-average) + (1.0 - (effBH - effB))) / (1.0 + effS);
if (effBH < 0 || effBH > effB) {
fEffBvsSLocal->SetBinContent( ibinS, effB );
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
fCutMin[ivar][ibinS-1] = fTmpCutMin[ivar];
fCutMax[ivar][ibinS-1] = fTmpCutMax[ivar];
}
}
if (ibinS<=1) {
Double_t penalty=0.,diff=0.;
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
diff=(fCutRange[ivar]->GetMax()-fTmpCutMax[ivar])/(fCutRange[ivar]->GetMax()-fCutRange[ivar]->GetMin());
penalty+=diff*diff;
diff=(fCutRange[ivar]->GetMin()-fTmpCutMin[ivar])/(fCutRange[ivar]->GetMax()-fCutRange[ivar]->GetMin());
penalty+=4.*diff*diff;
}
if (effS<1.e-4) return 10.0+penalty;
else return 10.*(1.-10.*effS);
}
return eta;
}
void TMVA::MethodCuts::MatchParsToCuts( const std::vector<Double_t> & pars,
Double_t* cutMin, Double_t* cutMax )
{
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
Int_t ipar = 2*ivar;
cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar];
}
}
void TMVA::MethodCuts::MatchCutsToPars( std::vector<Double_t>& pars,
Double_t** cutMinAll, Double_t** cutMaxAll, Int_t ibin )
{
if (ibin < 1 || ibin > fNbins) Log() << kFATAL << "::MatchCutsToPars: bin error: "
<< ibin << Endl;
const UInt_t nvar = GetNvar();
Double_t *cutMin = new Double_t[nvar];
Double_t *cutMax = new Double_t[nvar];
for (UInt_t ivar=0; ivar<nvar; ivar++) {
cutMin[ivar] = cutMinAll[ivar][ibin-1];
cutMax[ivar] = cutMaxAll[ivar][ibin-1];
}
MatchCutsToPars( pars, cutMin, cutMax );
delete [] cutMin;
delete [] cutMax;
}
void TMVA::MethodCuts::MatchCutsToPars( std::vector<Double_t>& pars,
Double_t* cutMin, Double_t* cutMax )
{
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
Int_t ipar = 2*ivar;
pars[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
pars[ipar+1] = cutMax[ivar] - cutMin[ivar];
}
}
void TMVA::MethodCuts::GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
Double_t& effS, Double_t& effB )
{
effS = 1.0;
effB = 1.0;
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
}
if( effS < 0.0 ) {
effS = 0.0;
if( !fNegEffWarning ) Log() << kWARNING << "Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
fNegEffWarning = kTRUE;
}
if( effB < 0.0 ) {
effB = 0.0;
if( !fNegEffWarning ) Log() << kWARNING << "Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
fNegEffWarning = kTRUE;
}
}
void TMVA::MethodCuts::GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
Double_t& effS, Double_t& effB)
{
Float_t nTotS = 0, nTotB = 0;
Float_t nSelS = 0, nSelB = 0;
Volume* volume = new Volume( cutMin, cutMax, GetNvar() );
nSelS = fBinaryTreeS->SearchVolume( volume );
nSelB = fBinaryTreeB->SearchVolume( volume );
delete volume;
nTotS = fBinaryTreeS->GetSumOfWeights();
nTotB = fBinaryTreeB->GetSumOfWeights();
if (nTotS == 0 && nTotB == 0) {
Log() << kFATAL << "<GetEffsfromSelection> fatal error in zero total number of events:"
<< " nTotS, nTotB: " << nTotS << " " << nTotB << " ***" << Endl;
}
if (nTotS == 0 ) {
effS = 0;
effB = nSelB/nTotB;
Log() << kWARNING << "<ComputeEstimator> zero number of signal events" << Endl;
}
else if (nTotB == 0) {
effB = 0;
effS = nSelS/nTotS;
Log() << kWARNING << "<ComputeEstimator> zero number of background events" << Endl;
}
else {
effS = nSelS/nTotS;
effB = nSelB/nTotB;
}
if( effS < 0.0 ) {
effS = 0.0;
if( !fNegEffWarning ) Log() << kWARNING << "Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
fNegEffWarning = kTRUE;
}
if( effB < 0.0 ) {
effB = 0.0;
if( !fNegEffWarning ) Log() << kWARNING << "Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
fNegEffWarning = kTRUE;
}
}
void TMVA::MethodCuts::CreateVariablePDFs( void )
{
fVarHistS = new std::vector<TH1*>( GetNvar() );
fVarHistB = new std::vector<TH1*>( GetNvar() );
fVarHistS_smooth = new std::vector<TH1*>( GetNvar() );
fVarHistB_smooth = new std::vector<TH1*>( GetNvar() );
fVarPdfS = new std::vector<PDF*>( GetNvar() );
fVarPdfB = new std::vector<PDF*>( GetNvar() );
Int_t nsmooth = 0;
Double_t minVal = DBL_MAX;
Double_t maxVal = -DBL_MAX;
for( UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++ ){
const Event *ev = GetEvent(ievt);
Float_t val = ev->GetValue(ievt);
if( val > minVal ) minVal = val;
if( val < maxVal ) maxVal = val;
}
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
TString histTitle = (*fInputVars)[ivar] + " signal training";
TString histName = (*fInputVars)[ivar] + "_sig";
(*fVarHistS)[ivar] = new TH1F(histName.Data(), histTitle.Data(), fNbins, minVal, maxVal );
histTitle = (*fInputVars)[ivar] + " background training";
histName = (*fInputVars)[ivar] + "_bgd";
(*fVarHistB)[ivar] = new TH1F(histName.Data(), histTitle.Data(), fNbins, minVal, maxVal );
for( UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++ ){
const Event *ev = GetEvent(ievt);
Float_t val = ev->GetValue(ievt);
if( DataInfo().IsSignal(ev) ){
(*fVarHistS)[ivar]->Fill( val );
}else{
(*fVarHistB)[ivar]->Fill( val );
}
}
(*fVarHistS_smooth)[ivar] = (TH1F*)(*fVarHistS)[ivar]->Clone();
histTitle = (*fInputVars)[ivar] + " signal training smoothed ";
histTitle += nsmooth;
histTitle +=" times";
histName = (*fInputVars)[ivar] + "_sig_smooth";
(*fVarHistS_smooth)[ivar]->SetName(histName);
(*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
(*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
(*fVarHistB_smooth)[ivar] = (TH1F*)(*fVarHistB)[ivar]->Clone();
histTitle = (*fInputVars)[ivar]+" background training smoothed ";
histTitle += nsmooth;
histTitle +=" times";
histName = (*fInputVars)[ivar]+"_bgd_smooth";
(*fVarHistB_smooth)[ivar]->SetName(histName);
(*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
(*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
(*fVarPdfS)[ivar] = new PDF( TString(GetName()) + " PDF Var Sig " + GetInputVar( ivar ), (*fVarHistS_smooth)[ivar], PDF::kSpline2 );
(*fVarPdfB)[ivar] = new PDF( TString(GetName()) + " PDF Var Bkg " + GetInputVar( ivar ), (*fVarHistB_smooth)[ivar], PDF::kSpline2 );
}
}
void TMVA::MethodCuts::ReadWeightsFromStream( std::istream& istr )
{
TString dummy;
UInt_t dummyInt;
istr >> dummy >> dummy;
istr >> dummy >> fNbins;
istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> dummyInt >> dummy ;
if (dummyInt != Data()->GetNVariables()) {
Log() << kFATAL << "<ReadWeightsFromStream> fatal error: mismatch "
<< "in number of variables: " << dummyInt << " != " << Data()->GetNVariables() << Endl;
}
if (fFitMethod == kUseMonteCarlo) {
Log() << kINFO << "Read cuts optimised using sample of MC events" << Endl;
}
else if (fFitMethod == kUseMonteCarloEvents) {
Log() << kINFO << "Read cuts optimised using sample of MC events" << Endl;
}
else if (fFitMethod == kUseGeneticAlgorithm) {
Log() << kINFO << "Read cuts optimised using Genetic Algorithm" << Endl;
}
else if (fFitMethod == kUseSimulatedAnnealing) {
Log() << kINFO << "Read cuts optimised using Simulated Annealing algorithm" << Endl;
}
else if (fFitMethod == kUseEventScan) {
Log() << kINFO << "Read cuts optimised using Full Event Scan" << Endl;
}
else {
Log() << kWARNING << "unknown method: " << fFitMethod << Endl;
}
Log() << kINFO << "in " << fNbins << " signal efficiency bins and for " << GetNvar() << " variables" << Endl;
char buffer[200];
istr.getline(buffer,200);
istr.getline(buffer,200);
Int_t tmpbin;
Float_t tmpeffS, tmpeffB;
if (fEffBvsSLocal != 0) delete fEffBvsSLocal;
fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal",
TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
fEffBvsSLocal->SetDirectory(0);
for (Int_t ibin=0; ibin<fNbins; ibin++) {
istr >> tmpbin >> tmpeffS >> tmpeffB;
fEffBvsSLocal->SetBinContent( ibin+1, tmpeffB );
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
istr >> fCutMin[ivar][ibin] >> fCutMax[ivar][ibin];
}
}
fEffSMin = fEffBvsSLocal->GetBinCenter(1);
fEffSMax = fEffBvsSLocal->GetBinCenter(fNbins);
}
void TMVA::MethodCuts::AddWeightsXMLTo( void* parent ) const
{
std::vector<Double_t> cutsMin;
std::vector<Double_t> cutsMax;
void* wght = gTools().AddChild(parent, "Weights");
gTools().AddAttr( wght, "OptimisationMethod", (Int_t)fEffMethod);
gTools().AddAttr( wght, "FitMethod", (Int_t)fFitMethod );
gTools().AddAttr( wght, "nbins", fNbins );
gTools().AddComment( wght, Form( "Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]", GetNvar() ) );
for (Int_t ibin=0; ibin<fNbins; ibin++) {
Double_t effS = fEffBvsSLocal->GetBinCenter ( ibin + 1 );
Double_t trueEffS = GetCuts( effS, cutsMin, cutsMax );
if (TMath::Abs(trueEffS) < 1e-10) trueEffS = 0;
void* binxml = gTools().AddChild( wght, "Bin" );
gTools().AddAttr( binxml, "ibin", ibin+1 );
gTools().AddAttr( binxml, "effS", trueEffS );
gTools().AddAttr( binxml, "effB", fEffBvsSLocal->GetBinContent( ibin + 1 ) );
void* cutsxml = gTools().AddChild( binxml, "Cuts" );
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
gTools().AddAttr( cutsxml, Form( "cutMin_%i", ivar ), cutsMin[ivar] );
gTools().AddAttr( cutsxml, Form( "cutMax_%i", ivar ), cutsMax[ivar] );
}
}
}
void TMVA::MethodCuts::ReadWeightsFromXML( void* wghtnode )
{
for (UInt_t i=0; i<GetNvar(); i++) {
if (fCutMin[i] != 0) delete [] fCutMin[i];
if (fCutMax[i] != 0) delete [] fCutMax[i];
}
if (fCutMin != 0) delete [] fCutMin;
if (fCutMax != 0) delete [] fCutMax;
Int_t tmpEffMethod, tmpFitMethod;
gTools().ReadAttr( wghtnode, "OptimisationMethod", tmpEffMethod );
gTools().ReadAttr( wghtnode, "FitMethod", tmpFitMethod );
gTools().ReadAttr( wghtnode, "nbins", fNbins );
fEffMethod = (EEffMethod)tmpEffMethod;
fFitMethod = (EFitMethodType)tmpFitMethod;
if (fFitMethod == kUseMonteCarlo) {
Log() << kINFO << "Read cuts optimised using sample of MC events" << Endl;
}
else if (fFitMethod == kUseMonteCarloEvents) {
Log() << kINFO << "Read cuts optimised using sample of MC-Event events" << Endl;
}
else if (fFitMethod == kUseGeneticAlgorithm) {
Log() << kINFO << "Read cuts optimised using Genetic Algorithm" << Endl;
}
else if (fFitMethod == kUseSimulatedAnnealing) {
Log() << kINFO << "Read cuts optimised using Simulated Annealing algorithm" << Endl;
}
else if (fFitMethod == kUseEventScan) {
Log() << kINFO << "Read cuts optimised using Full Event Scan" << Endl;
}
else {
Log() << kWARNING << "unknown method: " << fFitMethod << Endl;
}
Log() << kINFO << "Reading " << fNbins << " signal efficiency bins for " << GetNvar() << " variables" << Endl;
delete fEffBvsSLocal;
fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal",
TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
fEffBvsSLocal->SetDirectory(0);
for (Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
fCutMin = new Double_t*[GetNvar()];
fCutMax = new Double_t*[GetNvar()];
for (UInt_t i=0;i<GetNvar();i++) {
fCutMin[i] = new Double_t[fNbins];
fCutMax[i] = new Double_t[fNbins];
}
Int_t tmpbin;
Float_t tmpeffS, tmpeffB;
void* ch = gTools().GetChild(wghtnode,"Bin");
while (ch) {
gTools().ReadAttr( ch, "ibin", tmpbin );
gTools().ReadAttr( ch, "effS", tmpeffS );
gTools().ReadAttr( ch, "effB", tmpeffB );
if (tmpbin-1 >= fNbins || tmpbin-1 < 0) {
Log() << kFATAL << "Mismatch in bins: " << tmpbin-1 << " >= " << fNbins << Endl;
}
fEffBvsSLocal->SetBinContent( tmpbin, tmpeffB );
void* ct = gTools().GetChild(ch);
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
gTools().ReadAttr( ct, Form( "cutMin_%i", ivar ), fCutMin[ivar][tmpbin-1] );
gTools().ReadAttr( ct, Form( "cutMax_%i", ivar ), fCutMax[ivar][tmpbin-1] );
}
ch = gTools().GetNextChild(ch, "Bin");
}
}
void TMVA::MethodCuts::WriteMonitoringHistosToFile( void ) const
{
Log() << kINFO << "Write monitoring histograms to file: " << BaseDir()->GetPath() << Endl;
fEffBvsSLocal->Write();
if (fEffMethod == kUsePDFs) {
for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
(*fVarHistS)[ivar]->Write();
(*fVarHistB)[ivar]->Write();
(*fVarHistS_smooth)[ivar]->Write();
(*fVarHistB_smooth)[ivar]->Write();
(*fVarPdfS)[ivar]->GetPDFHist()->Write();
(*fVarPdfB)[ivar]->GetPDFHist()->Write();
}
}
}
Double_t TMVA::MethodCuts::GetTrainingEfficiency(const TString& theString)
{
TList* list = gTools().ParseFormatLine( theString );
if (list->GetSize() != 2) {
Log() << kFATAL << "<GetTrainingEfficiency> wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05" << Endl;
return -1;
}
Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, GetAnalysisType());
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
delete list;
if (results->GetHist("EFF_BVSS_TR")==0) {
if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
fBinaryTreeS = new BinarySearchTree();
fBinaryTreeS->Fill( GetEventCollection(Types::kTraining), fSignalClass );
fBinaryTreeB = new BinarySearchTree();
fBinaryTreeB->Fill( GetEventCollection(Types::kTraining), fBackgroundClass );
TH1* eff_bvss_tr = new TH1F( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_bvss_tr->SetBinContent( ibin, -0.1 );
TH1* rej_bvss_tr = new TH1F( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
for (Int_t ibin=1; ibin<=fNbins; ibin++) rej_bvss_tr->SetBinContent( ibin, 0. );
results->Store(eff_bvss_tr, "EFF_BVSS_TR");
results->Store(rej_bvss_tr, "REJ_BVSS_TR");
Double_t* tmpCutMin = new Double_t[GetNvar()];
Double_t* tmpCutMax = new Double_t[GetNvar()];
Int_t nFailedBins=0;
for (Int_t bini=1; bini<=fNbins; bini++) {
for (UInt_t ivar=0; ivar <GetNvar(); ivar++){
tmpCutMin[ivar] = fCutMin[ivar][bini-1];
tmpCutMax[ivar] = fCutMax[ivar][bini-1];
}
Double_t effS, effB;
this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
Int_t effBin = eff_bvss_tr->GetXaxis()->FindBin(effS);
if (effBin != bini){
Log()<< kVERBOSE << "unable to fill efficiency bin " << bini<< " " << effBin <<Endl;
nFailedBins++;
}
else{
eff_bvss_tr->SetBinContent( bini, effB );
rej_bvss_tr->SetBinContent( bini, 1.0-effB );
}
}
if (nFailedBins>0) Log()<< kWARNING << " unable to fill "<< nFailedBins <<" efficiency bins " <<Endl;
delete [] tmpCutMin;
delete [] tmpCutMax;
fSplTrainEffBvsS = new TSpline1( "trainEffBvsS", new TGraph( eff_bvss_tr ) );
}
if (NULL == fSplTrainEffBvsS) return 0.0;
Double_t effS, effB, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSplTrainEffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
return 0.5*(effS + effS_);
}
Double_t TMVA::MethodCuts::GetEfficiency( const TString& theString, Types::ETreeType type, Double_t& effSerr )
{
Data()->SetCurrentType(type);
Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, GetAnalysisType() );
TList* list = gTools().ParseFormatLine( theString, ":" );
if (list->GetSize() > 2) {
delete list;
Log() << kFATAL << "<GetEfficiency> wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
return -1;
}
Bool_t computeArea = (list->GetSize() < 2);
Float_t effBref = (computeArea?1.:atof( ((TObjString*)list->At(1))->GetString() ));
delete list;
if (results->GetHist("MVA_EFF_BvsS")==0) {
if (fBinaryTreeS!=0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
if (fBinaryTreeB!=0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
fBinaryTreeS = new BinarySearchTree();
fBinaryTreeS->Fill( GetEventCollection(Types::kTesting), fSignalClass );
fBinaryTreeB = new BinarySearchTree();
fBinaryTreeB->Fill( GetEventCollection(Types::kTesting), fBackgroundClass );
TH1* eff_BvsS = new TH1F( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_BvsS->SetBinContent( ibin, -0.1 );
TH1* rej_BvsS = new TH1F( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
for (Int_t ibin=1; ibin<=fNbins; ibin++) rej_BvsS->SetBinContent( ibin, 0.0 );
results->Store(eff_BvsS, "MVA_EFF_BvsS");
results->Store(rej_BvsS);
Double_t xmin = 0.;
Double_t xmax = 1.000001;
TH1* eff_s = new TH1F( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbins, xmin, xmax);
for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_s->SetBinContent( ibin, -0.1 );
TH1* eff_b = new TH1F( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbins, xmin, xmax);
for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_b->SetBinContent( ibin, -0.1 );
results->Store(eff_s, "MVA_S");
results->Store(eff_b, "MVA_B");
Double_t* tmpCutMin = new Double_t[GetNvar()];
Double_t* tmpCutMax = new Double_t[GetNvar()];
TGraph* tmpBvsS = new TGraph(fNbins+1);
tmpBvsS->SetPoint(0, 0., 0.);
for (Int_t bini=1; bini<=fNbins; bini++) {
for (UInt_t ivar=0; ivar <GetNvar(); ivar++) {
tmpCutMin[ivar] = fCutMin[ivar][bini-1];
tmpCutMax[ivar] = fCutMax[ivar][bini-1];
}
Double_t effS, effB;
this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
tmpBvsS->SetPoint(bini, effS, effB);
eff_s->SetBinContent(bini, effS);
eff_b->SetBinContent(bini, effB);
}
tmpBvsS->SetPoint(fNbins+1, 1., 1.);
delete [] tmpCutMin;
delete [] tmpCutMax;
fSpleffBvsS = new TSpline1( "effBvsS", tmpBvsS );
for (Int_t bini=1; bini<=fNbins; bini++) {
Double_t effS = (bini - 0.5)/Float_t(fNbins);
Double_t effB = fSpleffBvsS->Eval( effS );
eff_BvsS->SetBinContent( bini, effB );
rej_BvsS->SetBinContent( bini, 1.0-effB );
}
}
if (NULL == fSpleffBvsS) return 0.0;
Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
if (computeArea) {
Double_t integral = 0;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSpleffBvsS->Eval( effS );
integral += (1.0 - effB);
}
integral /= nbins_;
return integral;
}
else {
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSpleffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
effS = 0.5*(effS + effS_);
effSerr = 0;
if (Data()->GetNEvtSigTest() > 0)
effSerr = TMath::Sqrt( effS*(1.0 - effS)/Double_t(Data()->GetNEvtSigTest()) );
return effS;
}
return -1;
}
void TMVA::MethodCuts::MakeClassSpecific( std::ostream& fout, const TString& className ) const
{
fout << " // not implemented for class: \"" << className << "\"" << std::endl;
fout << "};" << std::endl;
}
void TMVA::MethodCuts::GetHelpMessage() const
{
TString bold = gConfig().WriteOptionsReference() ? "<b>" : "";
TString resbold = gConfig().WriteOptionsReference() ? "</b>" : "";
TString brk = gConfig().WriteOptionsReference() ? "<br>" : "";
Log() << Endl;
Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
Log() << Endl;
Log() << "The optimisation of rectangular cuts performed by TMVA maximises " << Endl;
Log() << "the background rejection at given signal efficiency, and scans " << Endl;
Log() << "over the full range of the latter quantity. Three optimisation" << Endl;
Log() << "methods are optional: Monte Carlo sampling (MC), a Genetics" << Endl;
Log() << "Algorithm (GA), and Simulated Annealing (SA). GA and SA are" << Endl;
Log() << "expected to perform best." << Endl;
Log() << Endl;
Log() << "The difficulty to find the optimal cuts strongly increases with" << Endl;
Log() << "the dimensionality (number of input variables) of the problem." << Endl;
Log() << "This behavior is due to the non-uniqueness of the solution space."<< Endl;
Log() << Endl;
Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
Log() << Endl;
Log() << "If the dimensionality exceeds, say, 4 input variables, it is " << Endl;
Log() << "advisable to scrutinize the separation power of the variables," << Endl;
Log() << "and to remove the weakest ones. If some among the input variables" << Endl;
Log() << "can be described by a single cut (e.g., because signal tends to be" << Endl;
Log() << "larger than background), this can be indicated to MethodCuts via" << Endl;
Log() << "the \"Fsmart\" options (see option string). Choosing this option" << Endl;
Log() << "reduces the number of requirements for the variable from 2 (min/max)" << Endl;
Log() << "to a single one (TMVA finds out whether it is to be interpreted as" << Endl;
Log() << "min or max)." << Endl;
Log() << Endl;
Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
Log() << "" << Endl;
Log() << bold << "Monte Carlo sampling:" << resbold << Endl;
Log() << "" << Endl;
Log() << "Apart form the \"Fsmart\" option for the variables, the only way" << Endl;
Log() << "to improve the MC sampling is to increase the sampling rate. This" << Endl;
Log() << "is done via the configuration option \"MC_NRandCuts\". The execution" << Endl;
Log() << "time scales linearly with the sampling rate." << Endl;
Log() << "" << Endl;
Log() << bold << "Genetic Algorithm:" << resbold << Endl;
Log() << "" << Endl;
Log() << "The algorithm terminates if no significant fitness increase has" << Endl;
Log() << "been achieved within the last \"nsteps\" steps of the calculation." << Endl;
Log() << "Wiggles in the ROC curve or constant background rejection of 1" << Endl;
Log() << "indicate that the GA failed to always converge at the true maximum" << Endl;
Log() << "fitness. In such a case, it is recommended to broaden the search " << Endl;
Log() << "by increasing the population size (\"popSize\") and to give the GA " << Endl;
Log() << "more time to find improvements by increasing the number of steps" << Endl;
Log() << "(\"nsteps\")" << Endl;
Log() << " -> increase \"popSize\" (at least >10 * number of variables)" << Endl;
Log() << " -> increase \"nsteps\"" << Endl;
Log() << "" << Endl;
Log() << bold << "Simulated Annealing (SA) algorithm:" << resbold << Endl;
Log() << "" << Endl;
Log() << "\"Increasing Adaptive\" approach:" << Endl;
Log() << "" << Endl;
Log() << "The algorithm seeks local minima and explores their neighborhood, while" << Endl;
Log() << "changing the ambient temperature depending on the number of failures" << Endl;
Log() << "in the previous steps. The performance can be improved by increasing" << Endl;
Log() << "the number of iteration steps (\"MaxCalls\"), or by adjusting the" << Endl;
Log() << "minimal temperature (\"MinTemperature\"). Manual adjustments of the" << Endl;
Log() << "speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" << Endl;
Log() << "to individual data sets should also help. Summary:" << brk << Endl;
Log() << " -> increase \"MaxCalls\"" << brk << Endl;
Log() << " -> adjust \"MinTemperature\"" << brk << Endl;
Log() << " -> adjust \"TemperatureScale\"" << brk << Endl;
Log() << " -> adjust \"AdaptiveSpeed\"" << Endl;
Log() << "" << Endl;
Log() << "\"Decreasing Adaptive\" approach:" << Endl;
Log() << "" << Endl;
Log() << "The algorithm calculates the initial temperature (based on the effect-" << Endl;
Log() << "iveness of large steps) and the multiplier that ensures to reach the" << Endl;
Log() << "minimal temperature with the requested number of iteration steps." << Endl;
Log() << "The performance can be improved by adjusting the minimal temperature" << Endl;
Log() << " (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" << brk << Endl;
Log() << " -> increase \"MaxCalls\"" << brk << Endl;
Log() << " -> adjust \"MinTemperature\"" << Endl;
Log() << " " << Endl;
Log() << "Other kernels:" << Endl;
Log() << "" << Endl;
Log() << "Alternative ways of counting the temperature change are implemented. " << Endl;
Log() << "Each of them starts with the maximum temperature (\"MaxTemperature\")" << Endl;
Log() << "and descreases while changing the temperature according to a given" << Endl;
Log() << "prescription:" << brk << Endl;
Log() << "CurrentTemperature =" << brk << Endl;
Log() << " - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" << brk << Endl;
Log() << " - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" << brk << Endl;
Log() << " - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" << brk << Endl;
Log() << " - Sin: (Sin(StepNumber / TemperatureScale) + 1) / (StepNumber + 1)*InitialTemperature + Eps" << brk << Endl;
Log() << " - Geo: CurrentTemperature * TemperatureScale" << Endl;
Log() << "" << Endl;
Log() << "Their performance can be improved by adjusting initial temperature" << Endl;
Log() << "(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," << Endl;
Log() << "and the multiplier that scales the termperature descrease" << Endl;
Log() << "(\"TemperatureScale\")" << brk << Endl;
Log() << " -> increase \"MaxCalls\"" << brk << Endl;
Log() << " -> adjust \"InitialTemperature\"" << brk << Endl;
Log() << " -> adjust \"TemperatureScale\"" << brk << Endl;
Log() << " -> adjust \"KernelTemperature\"" << Endl;
}