#include <iostream>
#include <stdio.h>
#include "time.h"
#include "Riostream.h"
#include "TH1F.h"
#include "TObjString.h"
#include "TDirectory.h"
#include "TMath.h"
#include "TGraph.h"
#include "TSpline.h"
#include "TMVA/MethodCuts.h"
#include "TMVA/GeneticFitter.h"
#include "TMVA/MinuitFitter.h"
#include "TMVA/MCFitter.h"
#include "TMVA/Tools.h"
#include "TMVA/Timer.h"
#include "TMVA/Interval.h"
#include "TMVA/TSpline1.h"
ClassImp(TMVA::MethodCuts)
TMVA::MethodCuts::MethodCuts( const TString& jobName, const TString& methodTitle, DataSet& theData,
const TString& theOption, TDirectory* theTargetDir )
: MethodBase( jobName, methodTitle, theData, theOption, theTargetDir )
{
InitCuts();
DeclareOptions();
ParseOptions();
ProcessOptions();
}
TMVA::MethodCuts::MethodCuts( DataSet& theData,
const TString& theWeightFile,
TDirectory* theTargetDir )
: MethodBase( theData, theWeightFile, theTargetDir )
{
InitCuts();
DeclareOptions();
}
void TMVA::MethodCuts::InitCuts( void )
{
SetMethodName( "Cuts" );
SetMethodType( Types::kCuts );
SetTestvarName();
fVarHistS = fVarHistB = 0;
fVarHistS_smooth = fVarHistB_smooth = 0;
fVarPdfS = fVarPdfB = 0;
fFitParams = 0;
fEffBvsSLocal = 0;
fBinaryTreeS = fBinaryTreeB = 0;
fEffSMin = 0;
fEffSMax = 0;
fTrainEffBvsS = 0;
fTrainRejBvsS = 0;
fNpar = 2*GetNvar();
fRangeSign = new vector<Int_t> ( GetNvar() );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) (*fRangeSign)[ivar] = +1;
fMeanS = new vector<Double_t>( GetNvar() );
fMeanB = new vector<Double_t>( GetNvar() );
fRmsS = new vector<Double_t>( GetNvar() );
fRmsB = new vector<Double_t>( GetNvar() );
fFitParams = new vector<EFitParameters>( GetNvar() );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) (*fFitParams)[ivar] = kNotEnforced;
fFitMethod = kUseMonteCarlo;
fTestSignalEff = -1;
fCutMin = new Double_t*[GetNvar()];
fCutMax = new Double_t*[GetNvar()];
for (Int_t i=0;i<GetNvar();i++) {
fCutMin[i] = new Double_t[fNbins];
fCutMax[i] = new Double_t[fNbins];
}
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
for (Int_t ibin=0; ibin<fNbins; ibin++) {
fCutMin[ivar][ibin] = 0;
fCutMax[ivar][ibin] = 0;
}
}
fTmpCutMin = new Double_t[GetNvar()];
fTmpCutMax = new Double_t[GetNvar()];
}
TMVA::MethodCuts::~MethodCuts( void )
{
delete fRangeSign;
delete fMeanS;
delete fMeanB;
delete fRmsS;
delete fRmsB;
for (Int_t i=0;i<GetNvar();i++) {
if (fCutMin[i] != NULL) delete [] fCutMin[i];
if (fCutMax[i] != NULL) delete [] fCutMax[i];
if (fCutRange[i] != NULL) delete fCutRange[i];
}
delete[] fCutMin;
delete[] fCutMax;
delete[] fTmpCutMin;
delete[] fTmpCutMax;
if (NULL != fBinaryTreeS) delete fBinaryTreeS;
if (NULL != fBinaryTreeB) delete fBinaryTreeB;
}
void TMVA::MethodCuts::DeclareOptions()
{
DeclareOptionRef(fFitMethodS = "GA", "FitMethod", "Minimization Method");
AddPreDefVal(TString("GA"));
AddPreDefVal(TString("SA"));
AddPreDefVal(TString("MC"));
AddPreDefVal(TString("MINUIT"));
DeclareOptionRef(fEffMethodS = "EffSel", "EffMethod", "Selection Method");
AddPreDefVal(TString("EffSel"));
AddPreDefVal(TString("EffPDF"));
fCutRange.resize(GetNvar());
fCutRangeMin = new Double_t[GetNvar()];
fCutRangeMax = new Double_t[GetNvar()];
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fCutRange[ivar] = 0;
fCutRangeMin[ivar] = fCutRangeMax[ivar] = -1;
}
DeclareOptionRef( fCutRangeMin, GetNvar(), "CutRangeMin", "Minimum of allowed cut range (set per variable)" );
DeclareOptionRef( fCutRangeMax, GetNvar(), "CutRangeMax", "Maximum of allowed cut range (set per variable)" );
fAllVarsI = new TString[GetNvar()];
for (int i=0; i<GetNvar(); i++) fAllVarsI[i] = "NotEnforced";
DeclareOptionRef(fAllVarsI, GetNvar(), "VarProp", "Categorisation of cuts");
AddPreDefVal(TString("NotEnforced"));
AddPreDefVal(TString("FMax"));
AddPreDefVal(TString("FMin"));
AddPreDefVal(TString("FSmart"));
AddPreDefVal(TString("FVerySmart"));
}
void TMVA::MethodCuts::ProcessOptions()
{
MethodBase::ProcessOptions();
if (IsNormalised()) {
fLogger << kWARNING << "Normalisation of the input variables for cut optimisation is not" << Endl;
fLogger << kWARNING << "supported because this provides intransparent cut values, and no" << Endl;
fLogger << kWARNING << "improvement in the performance of the algorithm." << Endl;
fLogger << kWARNING << "Please remove \"Normalise\" option from booking option string" << Endl;
fLogger << kWARNING << "==> Will reset normalisation flag to \"False\"" << Endl;
SetNormalised( kFALSE );
}
if (fFitMethodS == "MC" ) fFitMethod = kUseMonteCarlo;
else if (fFitMethodS == "GA" ) fFitMethod = kUseGeneticAlgorithm;
else if (fFitMethodS == "SA" ) fFitMethod = kUseSimulatedAnnealing;
else if (fFitMethodS == "MINUIT" ) {
fFitMethod = kUseMinuit;
fLogger << kWARNING << "poor performance of MINUIT in MethodCuts; preferred fit method: GA" << Endl;
}
else {
fLogger << kFATAL << "unknown minimization method: " << fFitMethodS << Endl;
}
if (fEffMethodS == "EFFSEL" ) fEffMethod = kUseEventSelection;
else if (fEffMethodS == "EFFPDF" ) fEffMethod = kUsePDFs;
else fEffMethod = kUseEventSelection;
fLogger << kINFO << Form("Use optimization method: '%s'\n",
(fFitMethod == kUseMonteCarlo) ? "Monte Carlo" : "Genetic Algorithm" );
fLogger << kINFO << Form("Use efficiency computation method: '%s'\n",
(fEffMethod == kUseEventSelection) ? "Event Selection" : "PDF" );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fCutRange[ivar] = new Interval( fCutRangeMin[ivar], fCutRangeMax[ivar] );
}
int maxVar = GetNvar();
for (Int_t ivar=0; ivar<maxVar; ivar++) {
EFitParameters theFitP = kNotEnforced;
if (fAllVarsI[ivar] == "" || fAllVarsI[ivar] == "NotEnforced") theFitP = kNotEnforced;
else if (fAllVarsI[ivar] == "FMax" ) theFitP = kForceMax;
else if (fAllVarsI[ivar] == "FMin" ) theFitP = kForceMin;
else if (fAllVarsI[ivar] == "FSmart" ) theFitP = kForceSmart;
else if (fAllVarsI[ivar] == "FVerySmart" ) theFitP = kForceVerySmart;
else {
fLogger << kFATAL << "unknown value \'" << fAllVarsI[ivar]
<< "\' for fit parameter option " << Form("VarProp[%i]",ivar+1) << Endl;
}
(*fFitParams)[ivar] = theFitP;
if (theFitP != kNotEnforced)
fLogger << kINFO << "Use \"" << fAllVarsI[ivar]
<< "\" cuts for variable: " << "'" << (*fInputVars)[ivar] << "'" << Endl;
}
if (fFitMethod == kUseMonteCarlo) {
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
TString theFitOption = ( ((*fFitParams)[ivar] == kNotEnforced) ? "NotEnforced" :
((*fFitParams)[ivar] == kForceMin ) ? "ForceMin" :
((*fFitParams)[ivar] == kForceMax ) ? "ForceMax" :
((*fFitParams)[ivar] == kForceSmart ) ? "ForceSmart" :
((*fFitParams)[ivar] == kForceVerySmart ) ? "ForceVerySmart" : "other" );
fLogger << kINFO << Form("Option for variable: %s: '%s' (#: %i)\n",
(const char*)(*fInputVars)[ivar], (const char*)theFitOption,
(Int_t)(*fFitParams)[ivar] );
}
}
if (GetVariableTransform() == Types::kDecorrelated)
fLogger << kINFO << "Use decorrelated variable set" << Endl;
else if (GetVariableTransform() == Types::kPCA)
fLogger << kINFO << "Use principal component transformation" << Endl;
}
Double_t TMVA::MethodCuts::GetMvaValue()
{
if (fCutMin == NULL || fCutMax == NULL || fNbins == 0) {
fLogger << kFATAL << "<Eval_Cuts> fCutMin/Max have zero pointer. "
<< "Did you book Cuts ?" << Endl;
}
if (fTestSignalEff > 0) {
Int_t ibin = Int_t((fTestSignalEff - fEffSMin)/(fEffSMax - fEffSMin)*Double_t(fNbins));
if (ibin < 0 ) ibin = 0;
else if (ibin >= fNbins) ibin = fNbins - 1;
Bool_t passed = kTRUE;
for (Int_t ivar=0; ivar<GetNvar(); ivar++)
passed &= ( (GetEventVal(ivar) > fCutMin[ivar][ibin]) &&
(GetEventVal(ivar) <= fCutMax[ivar][ibin]) );
return passed ? 1. : 0. ;
}
else return 0;
}
void TMVA::MethodCuts::PrintCuts( Double_t effS ) const
{
std::vector<Double_t> cutsMin;
std::vector<Double_t> cutsMax;
Int_t ibin = Int_t((effS - fEffSMin)/(fEffSMax - fEffSMin)*Double_t(fNbins));
GetCuts( effS, cutsMin, cutsMax );
std::vector<TString>* varVec = GetVarTransform().GetTransformationStrings();
if (!varVec) fLogger << kFATAL << "Big troubles in \"PrintCuts\": zero varVec pointer" << Endl;
UInt_t maxL = 0;
for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
if ((UInt_t)(*varVec)[ivar].Length() > maxL) maxL = (*varVec)[ivar].Length();
}
UInt_t maxLine = 20+maxL+16;
for (UInt_t i=0; i<maxLine; i++) fLogger << "-";
fLogger << Endl;
fLogger << kINFO << "Cut values for requested signal efficiency: " << effS << Endl;
fLogger << kINFO << "Corresponding background efficiency : " << fEffBvsSLocal->GetBinContent( ibin +1 )
<< ")" << Endl;
if (GetVariableTransform() != Types::kNone) {
fLogger << kINFO << "NOTE: The cuts are applied to TRANFORMED variables (see explicit transformation below)" << Endl;
fLogger << kINFO << " Name of the transformation: \"" << GetVarTransform().GetName() << "\"" << Endl;
}
for (UInt_t i=0; i<maxLine; i++) fLogger << "-";
fLogger << Endl;
for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
fLogger << kINFO
<< "Cut[" << setw(2) << ivar << "]: "
<< setw(10) << cutsMin[ivar]
<< " < "
<< setw(maxL) << (*varVec)[ivar]
<< " <= "
<< setw(10) << cutsMax[ivar] << Endl;
}
for (UInt_t i=0; i<maxLine; i++) fLogger << "-";
fLogger << Endl;
delete varVec;
}
void TMVA::MethodCuts::GetCuts( Double_t effS,
std::vector<Double_t>& cutMin,
std::vector<Double_t>& cutMax ) const
{
Int_t ibin = Int_t((effS - fEffSMin)/(fEffSMax - fEffSMin)*Double_t(fNbins));
if (ibin < 0 ) ibin = 0;
else if (ibin >= fNbins) ibin = fNbins - 1;
cutMin.clear();
cutMax.clear();
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
cutMin.push_back( fCutMin[ivar][ibin] );
cutMax.push_back( fCutMax[ivar][ibin] );
}
}
void TMVA::MethodCuts::Train( void )
{
if (!SanityChecks()) fLogger << kFATAL << "Basic sanity checks failed" << Endl;
if (fEffMethod == kUsePDFs) CreateVariablePDFs();
if (fBinaryTreeS != 0) delete fBinaryTreeS;
if (fBinaryTreeB != 0) delete fBinaryTreeB;
fBinaryTreeS = new BinarySearchTree();
fBinaryTreeS->Fill( *this, Data().GetTrainingTree(), 1 );
fBinaryTreeB = new BinarySearchTree();
fBinaryTreeB->Fill( *this, Data().GetTrainingTree(), 0 );
for (UInt_t ivar =0; ivar < Data().GetNVariables(); ivar++) {
(*fMeanS)[ivar] = fBinaryTreeS->Mean(Types::kSignal, ivar);
(*fRmsS)[ivar] = fBinaryTreeS->RMS (Types::kSignal, ivar);
(*fMeanB)[ivar] = fBinaryTreeB->Mean(Types::kBackground, ivar);
(*fRmsB)[ivar] = fBinaryTreeB->RMS (Types::kBackground, ivar);
Double_t xmin = TMath::Min(fBinaryTreeS->Min(Types::kSignal, ivar), fBinaryTreeB->Min(Types::kBackground, ivar));
Double_t xmax = TMath::Max(fBinaryTreeS->Max(Types::kSignal, ivar), fBinaryTreeB->Max(Types::kBackground, ivar));
if (fCutRange[ivar]->GetMin() == fCutRange[ivar]->GetMax()) {
fCutRange[ivar]->SetMin( xmin );
fCutRange[ivar]->SetMax( xmax );
}
else if (xmin > fCutRange[ivar]->GetMin()) fCutRange[ivar]->SetMin( xmin );
else if (xmax < fCutRange[ivar]->GetMax()) fCutRange[ivar]->SetMax( xmax );
}
vector<TH1F*> signalDist, bkgDist;
Data().ResetCurrentTree();
fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal",
TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
for (Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
if (fFitMethod == kUseGeneticAlgorithm || fFitMethod == kUseMonteCarlo || fFitMethod == kUseMinuit) {
vector<Interval*> ranges;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
Int_t nbins = 0;
if (Data().GetVarType(ivar) == 'I') {
nbins = Int_t(fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin()) + 1;
}
EFitParameters fitParam = (*fFitParams)[ivar];
if (fitParam == kForceSmart) {
if ((*fMeanS)[ivar] > (*fMeanB)[ivar]) fitParam = kForceMax;
else fitParam = kForceMin;
}
if (fitParam == kForceMin){
ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMin(), nbins ) );
ranges.push_back( new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
}
else if (fitParam == kForceMax){
ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
ranges.push_back( new Interval( fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(),
fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
}
else{
ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
ranges.push_back( new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
}
}
FitterBase* fitter = NULL;
switch (fFitMethod) {
case kUseGeneticAlgorithm:
fitter = new GeneticFitter( *this, Form("%sFitter_GA", GetName()), ranges, GetOptions() );
break;
case kUseMonteCarlo:
fitter = new MCFitter ( *this, Form("%sFitter_MC", GetName()), ranges, GetOptions() );
break;
case kUseMinuit:
fitter = new MinuitFitter ( *this, Form("%sFitter_MINUIT", GetName()), ranges, GetOptions() );
break;
default:
fLogger << kFATAL << "Wrong fit method: " << fFitMethod << Endl;
}
fitter->CheckForUnusedOptions();
fitter->Run();
for (UInt_t ivar=0; ivar<ranges.size(); ivar++) delete ranges[ivar];
}
else fLogger << kFATAL << "unknown minization method: " << fFitMethod << Endl;
if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
fEffSMin = fEffBvsSLocal->GetBinCenter(1);
fEffSMax = fEffBvsSLocal->GetBinCenter(fNbins);
PrintCuts( 0.5 );
}
void TMVA::MethodCuts::Test( TTree* )
{
}
Double_t TMVA::MethodCuts::EstimatorFunction( std::vector<Double_t>& par )
{
return ComputeEstimator( par );
}
Double_t TMVA::MethodCuts::ComputeEstimator( std::vector<Double_t>& par )
{
Double_t effS = 0, effB = 0;
this->MatchParsToCuts( par, &fTmpCutMin[0], &fTmpCutMax[0] );
switch (fEffMethod) {
case kUsePDFs:
this->GetEffsfromPDFs( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB );
break;
case kUseEventSelection:
this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
break;
default:
this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
}
Double_t eta = 0;
Int_t ibinS = (Int_t)(effS*Float_t(fNbins) + 1);
if (ibinS < 1 ) ibinS = 1;
if (ibinS > fNbins) ibinS = fNbins;
Double_t effBH = fEffBvsSLocal->GetBinContent( ibinS );
Double_t effBH_left = fEffBvsSLocal->GetBinContent( ibinS-1 );
Double_t effBH_right = fEffBvsSLocal->GetBinContent( ibinS+1 );
Double_t average = (effBH_left+effBH_right)/2.;
if (effBH < effB) average = effBH;
eta = ( -TMath::Abs(effBH-average) +( 1. - (effBH - effB) ) ) / (1+effS);
if (effBH < 0 || effBH > effB) {
fEffBvsSLocal->SetBinContent( ibinS, effB );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fCutMin[ivar][ibinS-1] = fTmpCutMin[ivar];
fCutMax[ivar][ibinS-1] = fTmpCutMax[ivar];
}
}
return eta;
}
void TMVA::MethodCuts::MatchParsToCuts( const std::vector<Double_t> & par,
Double_t* cutMin, Double_t* cutMax )
{
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
Int_t ipar = 2*ivar;
cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? par[ipar] : par[ipar] - par[ipar+1];
cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? par[ipar] + par[ipar+1] : par[ipar];
}
}
void TMVA::MethodCuts::MatchCutsToPars( std::vector<Double_t>& par,
Double_t** cutMinAll, Double_t** cutMaxAll, Int_t ibin )
{
if (ibin < 1 || ibin > fNbins) fLogger << kFATAL << "::MatchCutsToPars: bin error: "
<< ibin << Endl;
const Int_t nvar = GetNvar();
Double_t *cutMin = new Double_t[nvar];
Double_t *cutMax = new Double_t[nvar];
for (Int_t ivar=0; ivar<nvar; ivar++) {
cutMin[ivar] = cutMinAll[ivar][ibin-1];
cutMax[ivar] = cutMaxAll[ivar][ibin-1];
}
MatchCutsToPars( par, cutMin, cutMax );
delete [] cutMin;
delete [] cutMax;
}
void TMVA::MethodCuts::MatchCutsToPars( std::vector<Double_t>& par,
Double_t* cutMin, Double_t* cutMax )
{
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
Int_t ipar = 2*ivar;
par[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
par[ipar+1] = cutMax[ivar] - cutMin[ivar];
}
}
void TMVA::MethodCuts::GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
Double_t& effS, Double_t& effB )
{
effS = 1.0;
effB = 1.0;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
}
}
void TMVA::MethodCuts::GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
Double_t& effS, Double_t& effB)
{
Float_t nTotS = 0, nTotB = 0;
Float_t nSelS = 0, nSelB = 0;
Volume* volume = new Volume( cutMin, cutMax, GetNvar() );
nSelS = fBinaryTreeS->SearchVolume( volume );
nSelB = fBinaryTreeB->SearchVolume( volume );
delete volume;
nTotS = fBinaryTreeS->GetSumOfWeights();
nTotB = fBinaryTreeB->GetSumOfWeights();
if (nTotS == 0 && nTotB == 0) {
fLogger << kFATAL << "<GetEffsfromSelection> fatal error in zero total number of events:"
<< " nTotS, nTotB: " << nTotS << " " << nTotB << " ***" << Endl;
}
if (nTotS == 0 ) {
effS = 0;
effB = nSelB/nTotB;
fLogger << kWARNING << "<ComputeEstimator> zero number of signal events" << Endl;
}
else if (nTotB == 0) {
effB = 0;
effS = nSelS/nTotS;
fLogger << kWARNING << "<ComputeEstimator> zero number of background events" << Endl;
}
else {
effS = nSelS/nTotS;
effB = nSelB/nTotB;
}
}
void TMVA::MethodCuts::CreateVariablePDFs( void )
{
fVarHistS = new vector<TH1*>( GetNvar() );
fVarHistB = new vector<TH1*>( GetNvar() );
fVarHistS_smooth = new vector<TH1*>( GetNvar() );
fVarHistB_smooth = new vector<TH1*>( GetNvar() );
fVarPdfS = new vector<PDF*>( GetNvar() );
fVarPdfB = new vector<PDF*>( GetNvar() );
Int_t nsmooth = 0;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
TString histTitle = (*fInputVars)[ivar] + " signal training";
TString histName = (*fInputVars)[ivar] + "_sig";
TString drawOpt = (*fInputVars)[ivar] + ">>h(";
drawOpt += fNbins;
drawOpt += ")";
Data().GetTrainingTree()->Draw( drawOpt, "type==1", "goff" );
(*fVarHistS)[ivar] = (TH1F*)gDirectory->Get("h");
(*fVarHistS)[ivar]->SetName(histName);
(*fVarHistS)[ivar]->SetTitle(histTitle);
(*fVarHistS_smooth)[ivar] = (TH1F*)(*fVarHistS)[ivar]->Clone();
histTitle = (*fInputVars)[ivar] + " signal training smoothed ";
histTitle += nsmooth;
histTitle +=" times";
histName = (*fInputVars)[ivar] + "_sig_smooth";
(*fVarHistS_smooth)[ivar]->SetName(histName);
(*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
(*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
histTitle = (*fInputVars)[ivar] + " background training";
histName = (*fInputVars)[ivar] + "_bgd";
drawOpt = (*fInputVars)[ivar] + ">>h(";
drawOpt += fNbins;
drawOpt += ")";
Data().GetTrainingTree()->Draw( drawOpt, "type==0", "goff" );
(*fVarHistB)[ivar] = (TH1F*)gDirectory->Get("h");
(*fVarHistB)[ivar]->SetName(histName);
(*fVarHistB)[ivar]->SetTitle(histTitle);
(*fVarHistB_smooth)[ivar] = (TH1F*)(*fVarHistB)[ivar]->Clone();
histTitle = (*fInputVars)[ivar]+" background training smoothed ";
histTitle += nsmooth;
histTitle +=" times";
histName = (*fInputVars)[ivar]+"_bgd_smooth";
(*fVarHistB_smooth)[ivar]->SetName(histName);
(*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
(*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
(*fVarPdfS)[ivar] = new PDF( (*fVarHistS_smooth)[ivar], PDF::kSpline2 );
(*fVarPdfB)[ivar] = new PDF( (*fVarHistB_smooth)[ivar], PDF::kSpline2 );
}
}
Bool_t TMVA::MethodCuts::SanityChecks( void )
{
Bool_t isOK = kTRUE;
TObjArrayIter branchIter( Data().GetTrainingTree()->GetListOfBranches(), kIterForward );
TBranch* branch = 0;
Int_t ivar = -1;
while ((branch = (TBranch*)branchIter.Next()) != 0) {
TString branchName = branch->GetName();
if (branchName != "type" && branchName != "weight" && branchName != "boostweight") {
ivar++;
if ((*fInputVars)[ivar] != branchName) {
fLogger << kWARNING << "<SanityChecks> mismatch in variables" << Endl;
isOK = kFALSE;
}
}
}
return isOK;
}
void TMVA::MethodCuts::WriteWeightsToStream( ostream & o ) const
{
o << "OptimisationMethod " << "nbins:" << endl;
o << ((fEffMethod == kUseEventSelection) ? "Fit-EventSelection" :
(fEffMethod == kUsePDFs) ? "Fit-PDF" : "Monte-Carlo") << " " ;
o << fNbins << endl;
o << "Below are the optimised cuts for " << GetNvar() << " variables:" << endl;
o << "Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0]"
<< " ... cutMin[ivar=n-1] cutMax[ivar=n-1]" << endl;
for (Int_t ibin=0; ibin<fNbins; ibin++) {
o << setw(4) << ibin+1 << " "
<< setw(8)<< fEffBvsSLocal->GetBinCenter ( ibin + 1 ) << " "
<< setw(8)<< fEffBvsSLocal->GetBinContent( ibin + 1 ) << " ";
for (Int_t ivar=0; ivar<GetNvar(); ivar++)
o <<setw(10)<< fCutMin[ivar][ibin] << " " << setw(10) << fCutMax[ivar][ibin] << " ";
o << endl;
}
}
void TMVA::MethodCuts::ReadWeightsFromStream( istream& istr )
{
TString dummy;
UInt_t dummyInt;
istr >> dummy >> dummy;
istr >> dummy >> fNbins;
istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> dummyInt >> dummy ;
if (dummyInt != Data().GetNVariables()) {
fLogger << kFATAL << "<ReadWeightsFromStream> fatal error: mismatch "
<< "in number of variables: " << dummyInt << " != " << Data().GetNVariables() << Endl;
}
SetNvar(dummyInt);
if (fFitMethod == kUseMonteCarlo) {
fLogger << kINFO << "Read cuts optimised using "<< fNRandCuts << " MC events" << Endl;
}
else if (fFitMethod == kUseGeneticAlgorithm) {
fLogger << kINFO << "Read cuts optimised using Genetic Algorithm" << Endl;
}
else if (fFitMethod == kUseSimulatedAnnealing) {
fLogger << kINFO << "Read cuts optimised using Si,ulated Annealing" << Endl;
}
else {
fLogger << kWARNING << "unknown method: " << fFitMethod << Endl;
}
fLogger << kINFO << "in " << fNbins << " signal efficiency bins and for " << GetNvar() << " variables" << Endl;
char buffer[200];
istr.getline(buffer,200);
istr.getline(buffer,200);
Int_t tmpbin;
Float_t tmpeffS, tmpeffB;
if (fEffBvsSLocal!=0) delete fEffBvsSLocal;
fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal",
TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
for (Int_t ibin=0; ibin<fNbins; ibin++) {
istr >> tmpbin >> tmpeffS >> tmpeffB;
fEffBvsSLocal->SetBinContent( ibin+1, tmpeffB );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
istr >> fCutMin[ivar][ibin] >> fCutMax[ivar][ibin];
}
}
fEffSMin = fEffBvsSLocal->GetBinCenter(1);
fEffSMax = fEffBvsSLocal->GetBinCenter(fNbins);
}
void TMVA::MethodCuts::WriteMonitoringHistosToFile( void ) const
{
fLogger << kINFO << "write monitoring histograms to file: " << BaseDir()->GetPath() << Endl;
fEffBvsSLocal->Write();
if (fEffMethod == kUsePDFs) {
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
(*fVarHistS)[ivar]->Write();
(*fVarHistB)[ivar]->Write();
(*fVarHistS_smooth)[ivar]->Write();
(*fVarHistB_smooth)[ivar]->Write();
(*fVarPdfS)[ivar]->GetPDFHist()->Write();
(*fVarPdfB)[ivar]->GetPDFHist()->Write();
}
}
}
Double_t TMVA::MethodCuts::GetTrainingEfficiency( TString theString)
{
TList* list = Tools::ParseFormatLine( theString );
if (list->GetSize() != 2) {
fLogger << kFATAL << "<GetTrainingEfficiency> wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05" << Endl;
return -1;
}
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
Bool_t firstPass = (NULL == fTrainEffBvsS || NULL == fTrainRejBvsS);
if (firstPass) {
if (fBinaryTreeS != 0) delete fBinaryTreeS;
if (fBinaryTreeB != 0) delete fBinaryTreeB;
fBinaryTreeS = new BinarySearchTree();
fBinaryTreeS->Fill( *this, Data().GetTrainingTree(), 1 );
fBinaryTreeB = new BinarySearchTree();
fBinaryTreeB->Fill( *this, Data().GetTrainingTree(), 0 );
if (NULL != fTrainEffBvsS) delete fTrainEffBvsS;
if (NULL != fTrainRejBvsS) delete fTrainRejBvsS;
fTrainEffBvsS = new TH1F( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
fTrainRejBvsS = new TH1F( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
Double_t* tmpCutMin = new Double_t[GetNvar()];
Double_t* tmpCutMax = new Double_t[GetNvar()];
for (Int_t bini=1; bini<=fNbins; bini++) {
for (Int_t ivar=0; ivar <GetNvar(); ivar++){
tmpCutMin[ivar] = fCutMin[ivar][bini-1];
tmpCutMax[ivar] = fCutMax[ivar][bini-1];
}
Double_t effS, effB;
this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
fTrainEffBvsS->SetBinContent( bini, effB );
fTrainRejBvsS->SetBinContent( bini, 1.0-effB );
}
delete[] tmpCutMin;
delete[] tmpCutMax;
fGraphTrainEffBvsS = new TGraph( fTrainEffBvsS );
fSplTrainEffBvsS = new TSpline1( "trainEffBvsS", fGraphTrainEffBvsS );
}
if (NULL == fSplTrainEffBvsS) return 0.0;
Double_t effS, effB, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSplTrainEffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
return 0.5*(effS + effS_);
}
Double_t TMVA::MethodCuts::GetEfficiency( TString theString, TTree* theTree, Double_t& effSerr )
{
if (theTree == 0) { }
TList* list = TMVA::Tools::ParseFormatLine( theString, ":" );
Bool_t computeArea = kFALSE;
if (!list || list->GetSize() < 2) computeArea = kTRUE;
else if (list->GetSize() > 2) {
fLogger << kFATAL << "<GetEfficiency> wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
return -1;
}
if (fEffBvsS == NULL || fRejBvsS == NULL) {
if (fBinaryTreeS!=0) delete fBinaryTreeS;
if (fBinaryTreeB!=0) delete fBinaryTreeB;
fBinaryTreeS = new BinarySearchTree();
fBinaryTreeS->Fill( *this, Data().GetTestTree(), 1 );
fBinaryTreeB = new BinarySearchTree();
fBinaryTreeB->Fill( *this, Data().GetTestTree(), 0 );
if (NULL != fEffBvsS)delete fEffBvsS;
if (NULL != fRejBvsS)delete fRejBvsS;
fEffBvsS = new TH1F( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
fRejBvsS = new TH1F( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
Double_t* tmpCutMin = new Double_t[GetNvar()];
Double_t* tmpCutMax = new Double_t[GetNvar()];
for (Int_t bini=1; bini<=fNbins; bini++) {
for (Int_t ivar=0; ivar <GetNvar(); ivar++){
tmpCutMin[ivar] = fCutMin[ivar][bini-1];
tmpCutMax[ivar] = fCutMax[ivar][bini-1];
}
Double_t effS, effB;
this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
fEffBvsS->SetBinContent( bini, effB );
fRejBvsS->SetBinContent( bini, 1.0-effB );
}
delete[] tmpCutMin;
delete[] tmpCutMax;
fGrapheffBvsS = new TGraph( fEffBvsS );
fSpleffBvsS = new TSpline1( "effBvsS", fGrapheffBvsS );
}
if (NULL == fSpleffBvsS) return 0.0;
Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
if (computeArea) {
Double_t integral = 0;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSpleffBvsS->Eval( effS );
integral += (1.0 - effB);
}
integral /= nbins_;
return integral;
}
else {
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSpleffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
effS = 0.5*(effS + effS_);
effSerr = 0;
if (Data().GetNEvtSigTest() > 0)
effSerr = TMath::Sqrt( effS*(1.0 - effS)/Double_t(Data().GetNEvtSigTest()) );
return effS;
}
return -1;
}
void TMVA::MethodCuts::MakeClassSpecific( std::ostream& fout, const TString& className ) const
{
fout << " // not implemented for class: \"" << className << "\"" << endl;
fout << "};" << endl;
}
void TMVA::MethodCuts::GetHelpMessage() const
{
fLogger << Endl;
fLogger << Tools::Color("bold") << "--- Short description:" << Tools::Color("reset") << Endl;
fLogger << Endl;
fLogger << "The optimisation of rectangular cuts performed by TMVA maximises " << Endl;
fLogger << "the background rejection at given signal efficiency, and scans " << Endl;
fLogger << "over the full range of the latter quantity. Three optimisation" << Endl;
fLogger << "methods are optional: Monte Carlo sampling (MC), a Genetics Algo-," << Endl;
fLogger << "rithm (GA), and Simulated Annealing (SA - depreciated at present). " << Endl;
fLogger << "GA is expected to perform best." << Endl;
fLogger << Endl;
fLogger << "The difficulty to find the optimal cuts strongly increases with" << Endl;
fLogger << "the dimensionality (number of input variables) of the problem." << Endl;
fLogger << "This behavior is due to the non-uniqueness of the solution space."<< Endl;
fLogger << Endl;
fLogger << Tools::Color("bold") << "--- Performance optimisation:" << Tools::Color("reset") << Endl;
fLogger << Endl;
fLogger << "If the dimensionality exceeds, say, 4 input variables, it is " << Endl;
fLogger << "advisable to scrutinize the separation power of the variables," << Endl;
fLogger << "and to remove the weakest ones. If some among the input variables" << Endl;
fLogger << "can be described by a single cut (e.g., because signal tends to be" << Endl;
fLogger << "larger than background), this can be indicated to MethodCuts via" << Endl;
fLogger << "the \"Fsmart\" options (see option string). Choosing this option" << Endl;
fLogger << "reduces the number of requirements for the variable from 2 (min/max)" << Endl;
fLogger << "to a single one (TMVA finds out whether it is to be interpreted as" << Endl;
fLogger << "min or max)." << Endl;
fLogger << Endl;
fLogger << Tools::Color("bold") << "--- Performance tuning via configuration options:" << Tools::Color("reset") << Endl;
fLogger << "" << Endl;
fLogger << "Monte Carlo sampling:" << Endl;
fLogger << "" << Endl;
fLogger << "Apart form the \"Fsmart\" option for the variables, the only way" << Endl;
fLogger << "to improve the MC sampling is to increase the sampling rate. This" << Endl;
fLogger << "is done via the configuration option \"MC_NRandCuts\". The execution" << Endl;
fLogger << "time scales linearly with the sampling rate." << Endl;
fLogger << "" << Endl;
fLogger << "Genetic Algorithm:" << Endl;
fLogger << "" << Endl;
fLogger << "The algorithm terminates if no significant fitness increase has" << Endl;
fLogger << "been achieved within the last \"nsteps\" steps of the calculation." << Endl;
fLogger << "Wiggles in the ROC curve or constant background rejection of 1" << Endl;
fLogger << "indicate that the GA failed to always converge at the true maximum" << Endl;
fLogger << "fitness. In such a case, it is recommended to broaden the search " << Endl;
fLogger << "by increasing the population size (\"popSize\") and to give the GA " << Endl;
fLogger << "more time to find improvements by increasing the number of steps" << Endl;
fLogger << "(\"nsteps\")" << Endl;
fLogger << " -> increase \"popSize\" (at least >10 * number of variables)" << Endl;
fLogger << " -> increase \"nsteps\"" << Endl;
fLogger << "" << Endl;
}
Last update: Thu Jan 17 08:59:06 2008
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.