//Begin_Html
/*
Virtual base Class for all MVA method
MethodBase hosts several specific evaluation methods
The kind of MVA that provides optimal performance in an analysis strongly
depends on the particular application. The evaluation factory provides a
number of numerical benchmark results to directly assess the performance
of the MVA training on the independent test sample. These are:
<ul>
<li> The <i>signal efficiency</i> at three representative background efficiencies
(which is 1 − rejection).</li>
<li> The <i>significance</I> of an MVA estimator, defined by the difference
between the MVA mean values for signal and background, divided by the
quadratic sum of their root mean squares.</li>
<li> The <i>separation</i> of an MVA <i>x</i>, defined by the integral
½∫(S(x) − B(x))<sup>2</sup>/(S(x) + B(x))dx, where
S(x) and B(x) are the signal and background distributions, respectively.
The separation is zero for identical signal and background MVA shapes,
and it is one for disjunctive shapes.
<li> <a name="mu_transform">
The average, ∫x μ(S(x))dx, of the signal μ-transform.
The μ-transform of an MVA denotes the transformation that yields
a uniform background distribution. In this way, the signal distributions
S(x) can be directly compared among the various MVAs. The stronger S(x)
peaks towards one, the better is the discrimination of the MVA. The
μ-transform is
<a href=http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html>documented here</a>.
</ul>
The MVA standard output also prints the linear correlation coefficients between
signal and background, which can be useful to eliminate variables that exhibit too
strong correlations.
*/
//End_Html
#include <string>
#include <fstream>
#include <stdlib.h>
#include "TROOT.h"
#include "TSystem.h"
#include "TObjString.h"
#include "TQObject.h"
#include "TSpline.h"
#include "TMatrix.h"
#include "TMath.h"
#include <stdexcept>
#ifndef ROOT_TMVA_MethodBase
#include "TMVA/MethodBase.h"
#endif
#ifndef ROOT_TMVA_Timer
#include "TMVA/Timer.h"
#endif
#ifndef ROOT_TMVA_Tools
#include "TMVA/Tools.h"
#endif
#ifndef ROOT_TMVA_RootFinder
#include "TMVA/RootFinder.h"
#endif
#ifndef ROOT_TMVA_PDF
#include "TMVA/PDF.h"
#endif
ClassImp(TMVA::MethodBase)
;
const Bool_t DEBUG_TMVA_MethodBase = kFALSE;
const Int_t MethodBase_MaxIterations_ = 200;
const Bool_t Use_Splines_for_Eff_ = kTRUE;
const int NBIN_HIST_PLOT = 100;
const int NBIN_HIST_HIGH = 10000;
const TString BC_blue = "\033[34m" ;
const TString EC__ = "\033[0m" ;
const TString BC_yellow = "\033[1;33m";
const TString BC_lgreen = "\033[1;32m";
TMVA::MethodBase::MethodBase( TString jobName,
TString methodTitle,
DataSet& theData,
TString theOption,
TDirectory* theBaseDir)
: IMethod(),
fData ( theData ),
fJobName ( jobName ),
fMethodTitle ( methodTitle ),
fOptions ( theOption ),
fBaseDir ( theBaseDir ),
fWeightFile ( "" ),
fWeightFileType (kTEXT),
fTrainEffS (0),
fTrainEffB (0),
fTrainEffBvsS (0),
fTrainRejBvsS (0),
fGraphTrainS (0),
fGraphTrainB (0),
fGraphTrainEffBvsS (0),
fSplTrainS (0),
fSplTrainB (0),
fSplTrainEffBvsS (0),
fUseDecorr (kFALSE),
fPreprocessingMethod (Types::kNone),
fVerbose (kFALSE),
fHelp (kFALSE),
fLooseOptionCheckingEnabled(kTRUE),
fSplRefS (0),
fSplRefB (0),
fLogger (this)
{
this->Init();
DeclareOptions();
for (Int_t corr=0; corr!=Types::kMaxPreprocessingMethod; corr++) {
fXminNorm[corr] = new Double_t[Data().GetNVariables()];
fXmaxNorm[corr] = new Double_t[Data().GetNVariables()];
}
for (UInt_t ivar=0; ivar<Data().GetNVariables(); ivar++) {
SetXmin(ivar, Data().GetXmin(ivar, Types::kNone), Types::kNone);
SetXmax(ivar, Data().GetXmax(ivar, Types::kNone), Types::kNone);
}
fFileExtension = "weights";
fFileDir = "weights";
gSystem->MakeDirectory( fFileDir );
}
TMVA::MethodBase::MethodBase( DataSet& theData,
TString weightFile,
TDirectory* theBaseDir )
: IMethod(),
fData ( theData ),
fJobName ( "" ),
fOptions ( "" ),
fBaseDir ( theBaseDir ),
fWeightFile ( weightFile ),
fWeightFileType( kTEXT ),
fUseDecorr ( kFALSE ),
fPreprocessingMethod(Types::kNone),
fVerbose ( kTRUE ),
fHelp ( kFALSE ),
fLogger (this)
{
this->Init();
DeclareOptions();
fXminNorm[0] = fXminNorm[1] = fXmaxNorm[0] = fXmaxNorm[1] = 0;
}
TMVA::MethodBase::~MethodBase( void )
{
}
namespace TMVA
{
Bool_t IsLastOption(TString& theOpt)
{
return (theOpt.First(':')<0);
}
void SeparateOptions(TString& theOpt, TList& loo)
{
while (theOpt.Length()>0) {
if (IsLastOption(theOpt)) {
loo.Add(new TObjString(theOpt));
theOpt = "";
}
else {
TString toSave = theOpt(0,theOpt.First(':'));
loo.Add(new TObjString(toSave.Data()));
theOpt = theOpt(theOpt.First(':')+1,theOpt.Length());
}
}
}
}
void TMVA::MethodBase::ParseOptions( Bool_t verbose )
{
if (verbose) {
fLogger << kINFO << "parsing option string: " << Endl;
fLogger << kINFO << "\"" << fOptions << "\"" << Endl;
}
TString theOpt(fOptions);
TList loo;
theOpt = theOpt.Strip(TString::kLeading, ':');
SeparateOptions(theOpt, loo);
TListIter decOptIt(&fListOfOptions);
TListIter setOptIt(&loo);
while (TObjString * os = (TObjString*) setOptIt()) {
TString s = os->GetString();
bool paramParsed = false;
if (s.Contains('=')) {
TString optname = s(0,s.First('=')); optname.ToLower();
TString optval = s(s.First('=')+1,s.Length());
OptionBase * decOpt = 0;
TListIter optIt(&fListOfOptions);
while ( (decOpt = (OptionBase*)optIt()) !=0) {
TString predOptName(decOpt->GetName());
predOptName.ToLower();
if (predOptName == optname) break;
}
if (decOpt!=0) {
if (decOpt->IsSet())
if (verbose)
fLogger << kWARNING << "value for option " << decOpt->GetName()
<< " was previously set to " << decOpt->GetValue() << Endl;
if (!decOpt->HasPreDefinedVal() || (decOpt->HasPreDefinedVal() && decOpt->IsPreDefinedVal(optval)) ) {
decOpt->SetValue(optval);
paramParsed = kTRUE;
}
else fLogger << kFATAL << "option " << decOpt->TheName()
<< " has no predefined value " << optval << Endl;
}
else fLogger << kFATAL << "option " << optname << " not found!" << Endl;
}
if (!paramParsed) {
bool hasNot = false;
if (s.BeginsWith("!")) { s.Remove(0,1); hasNot=true; }
TString optname(s);optname.ToLower();
OptionBase * decOpt = 0;
TListIter optIt(&fListOfOptions);
while ( (decOpt = (OptionBase*)optIt()) !=0) {
if (dynamic_cast<Option<bool>*>(decOpt)==0) continue;
TString predOptName(decOpt->GetName());
predOptName.ToLower();
if (predOptName == optname) break;
}
if (decOpt!=0) {
decOpt->SetValue(hasNot?"0":"1");
paramParsed = true;
}
else {
if (hasNot) {
fLogger << kFATAL << "negating a non-boolean variable " << decOpt->GetName()
<< ", please check the opions for method " << GetName() << Endl;
}
}
}
if (!paramParsed && LooseOptionCheckingEnabled()) {
decOptIt.Reset();
while (OptionBase * decOpt = (OptionBase*) decOptIt()) {
if (decOpt->Type()=="float" || decOpt->Type()=="bool" ) continue;
TString sT;
if (decOpt->HasPreDefinedVal() && decOpt->IsPreDefinedVal(s) ) {
paramParsed = decOpt->SetValue(s);
break;
}
}
}
if (!paramParsed) {
fLogger << kFATAL << "can not interpret option \"" << s << "\" for method "
<< GetName() << ", please check" << Endl;
}
}
PrintOptions();
}
void TMVA::MethodBase::Init()
{
fIsOK = kTRUE;
fNvar = Data().GetNVariables();
fMeanS = -1;
fMeanB = -1;
fRmsS = -1;
fRmsB = -1;
fNbins = NBIN_HIST_PLOT;
fNbinsH = NBIN_HIST_HIGH;
fRanking = NULL;
fHistS_plotbin = NULL;
fHistB_plotbin = NULL;
fHistS_highbin = NULL;
fHistB_highbin = NULL;
fEffS = NULL;
fEffB = NULL;
fEffBvsS = NULL;
fRejBvsS = NULL;
fHistBhatS = NULL;
fHistBhatB = NULL;
fHistMuS = NULL;
fHistMuB = NULL;
fTestvarPrefix = "MVA_";
fXminNorm[0] = 0;
fXminNorm[1] = 0;
fXminNorm[2] = 0;
fXmaxNorm[0] = 0;
fXmaxNorm[1] = 0;
fXmaxNorm[2] = 0;
fSignalReferenceCut = 0.5;
fPreprocessingType = Types::kSignal;
fInputVars = new vector<TString>;
for(UInt_t ivar=0; ivar<Data().GetNVariables(); ivar++)
fInputVars->push_back(Data().GetInternalVarName(ivar));
ResetThisBase();
}
void TMVA::MethodBase::DeclareOptions()
{
DeclareOptionRef(fUseDecorr, "D", "use-decorrelated-variables flag (for backward compatibility)");
DeclareOptionRef(fPreprocessingString="None", "Preprocess", "Variable Decorrelation Method");
AddPreDefVal(TString("None"));
AddPreDefVal(TString("Decorrelate"));
AddPreDefVal(TString("PCA"));
DeclareOptionRef(fPreprocessingTypeString="Signal", "PreprocessType", "Use signal or background for Preprocess");
AddPreDefVal(TString("Signal"));
AddPreDefVal(TString("Background"));
DeclareOptionRef(fVerbose, "V","verbose flag");
DeclareOptionRef(fHelp, "H","help flag");
}
void TMVA::MethodBase::ProcessOptions()
{
if (fPreprocessingString == "None") fPreprocessingMethod = Types::kNone;
else if (fPreprocessingString == "Decorrelate" ) fPreprocessingMethod = Types::kDecorrelated;
else if (fPreprocessingString == "PCA" ) fPreprocessingMethod = Types::kPCA;
else {
fLogger << kFATAL << "<ProcessOptions> preprocess parameter '"
<< fPreprocessingString << "' unknown." << Endl;
}
if ((fPreprocessingMethod == Types::kNone) && fUseDecorr) fPreprocessingMethod = Types::kDecorrelated;
if (fPreprocessingTypeString == "Signal") fPreprocessingType = Types::kSignal;
else if (fPreprocessingTypeString == "Background" ) fPreprocessingType = Types::kBackground;
else {
fLogger << kFATAL << "<ProcessOptions> preprocess type '"
<< fPreprocessingTypeString << "' unknown." << Endl;
}
if (Verbose()) fLogger.SetMinType( kVERBOSE );
if( GetPreprocessingMethod() == Types::kDecorrelated ) {
Types::EPreprocessingMethod c = Types::kDecorrelated;
Data().EnablePreprocess(Types::kDecorrelated);
if( Data().Preprocess(Types::kDecorrelated) ) {
for (UInt_t ivar=0; ivar<Data().GetNVariables(); ivar++) {
SetXmin(ivar, Data().GetXmin(ivar, c), c);
SetXmax(ivar, Data().GetXmax(ivar, c), c);
}
}
}
if( GetPreprocessingMethod() == Types::kPCA ) {
Types::EPreprocessingMethod c = Types::kPCA;
Data().EnablePreprocess(Types::kPCA);
if( Data().Preprocess(Types::kPCA) ) {
for (UInt_t ivar=0; ivar<Data().GetNVariables(); ivar++) {
SetXmin(ivar, Data().GetXmin(ivar, c), c);
SetXmax(ivar, Data().GetXmax(ivar, c), c);
}
}
}
}
void TMVA::MethodBase::TrainMethod()
{
BaseDir()->cd();
Train();
WriteStateToFile();
}
void TMVA::MethodBase::WriteStateToStream(std::ostream& o) const
{
o << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << endl << endl;
o << "Method : " << GetMethodName() << endl;
o << "Creator: " << gSystem->GetUserInfo()->fUser << endl;
o << "Date : "; TDatime *d = new TDatime; o << d->AsString() << endl; delete d;
o << "Host : " << gSystem->GetBuildNode() << endl;
o << "Dir : " << gSystem->Getenv("PWD") << endl;
o << "Training events: " << Data().GetNEvtTrain() << endl;
o << endl;
o << endl << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << endl << endl;
WriteOptionsToStream(o);
o << endl;
o << endl << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << endl << endl;
Data().WriteVarsToStream(o, GetPreprocessingMethod());
o << endl;
if (GetPreprocessingMethod() != Types::kNone) {
o << endl << "#MAT -*-*-*-*-*-*-*-*-* decorrelation matrix -*-*-*-*-*-*-*-*-*-" << endl;
Data().WriteCorrMatToStream(o);
o << endl;
}
o << endl << "#WGT -*-*-*-*-*-*-*-*-*-*-*-*- weights -*-*-*-*-*-*-*-*-*-*-*-*-" << endl << endl;
WriteWeightsToStream(o);
WriteMonitoringHistosToFile();
}
void TMVA::MethodBase::WriteStateToFile() const
{
if (GetWeightFileType()==kTEXT) {
TString fname(GetWeightFileName());
fLogger << kINFO << "creating weight file: " << BC_blue << fname << EC__ << Endl;
ofstream fout( fname );
if (!fout.good()) {
fLogger << kFATAL << "<WriteStateToFile> "
<< "unable to open output weight file: " << fname << Endl;
}
WriteStateToStream(fout);
fout.close();
}
}
void TMVA::MethodBase::ReadStateFromFile()
{
TString fname(GetWeightFileName());
fLogger << kINFO << "reading weight file: " << BC_blue << fname << EC__ << Endl;
if (GetWeightFileType()==kTEXT) {
ifstream fin( fname );
if (!fin.good()) {
fLogger << kFATAL << "<ReadStateFromFile> "
<< "unable to open input weight file: " << fname << Endl;
}
ReadStateFromStream(fin);
fin.close();
}
}
void TMVA::MethodBase::ReadStateFromStream( std::istream& fin )
{
char buf[512];
fin.getline(buf,512);
while (!TString(buf).BeginsWith("Method")) fin.getline(buf,512);
TString ls(buf);
Int_t idx1 = ls.First(':')+2; Int_t idx2 = ls.Index(' ',idx1)-idx1; if (idx2<0) idx2=ls.Length();
this->SetMethodName(ls(idx1,idx2));
fin.getline(buf,512);
while (!TString(buf).BeginsWith("#OPT")) fin.getline(buf,512);
ReadOptionsFromStream(fin);
ParseOptions(Verbose());
fin.getline(buf,512);
while (!TString(buf).BeginsWith("#VAR")) {
fin.getline(buf,512);
}
Data().ReadVarsFromStream(fin, GetPreprocessingMethod());
ProcessOptions();
if ( GetPreprocessingMethod() != Types::kNone ) {
fin.getline(buf,512);
while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
Data().ReadCorrMatFromStream(fin);
}
for (Int_t corr=0; corr!=Types::kMaxPreprocessingMethod; corr++) {
if (0 != fXminNorm[corr]) delete fXminNorm[corr];
if (0 != fXmaxNorm[corr]) delete fXmaxNorm[corr];
fXminNorm[corr] = new Double_t[Data().GetNVariables()];
fXmaxNorm[corr] = new Double_t[Data().GetNVariables()];
Types::EPreprocessingMethod c = (Types::EPreprocessingMethod) corr;
for(UInt_t ivar=0; ivar<Data().GetNVariables(); ivar++) {
SetXmin(ivar, Data().GetXmin(ivar, c), c);
SetXmax(ivar, Data().GetXmax(ivar, c), c);
}
}
fin.getline(buf,512);
while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
fin.getline(buf,512);
ReadWeightsFromStream(fin);
}
Double_t TMVA::MethodBase::GetEventValNormalized(Int_t ivar) const
{
return Tools::NormVariable( Data().Event().GetVal(ivar),
GetXmin(ivar, GetPreprocessingMethod()),
GetXmax(ivar, GetPreprocessingMethod()));
}
TDirectory * TMVA::MethodBase::BaseDir( void ) const
{
if (fBaseDir != 0) return fBaseDir;
TDirectory* dir = 0;
TObject * o = Data().BaseRootDir()->FindObject(GetMethodTitle());
if (o!=0 && o->InheritsFrom("TDirectory")) dir = (TDirectory*)o;
if (dir != 0) return dir;
return Data().BaseRootDir()->mkdir(GetMethodTitle());
}
void TMVA::MethodBase::SetWeightFileName( TString theWeightFile)
{
fWeightFile = theWeightFile;
}
TString TMVA::MethodBase::GetWeightFileName() const
{
if (fWeightFile!="") return fWeightFile;
TString suffix = "";
TString weightFileName = fFileDir + "/" + fJobName + "_" + fMethodTitle + suffix + "." + fFileExtension;
if (GetWeightFileType()==kROOT) weightFileName += ".root";
if (GetWeightFileType()==kTEXT) weightFileName += ".txt";
return weightFileName;
}
Bool_t TMVA::MethodBase::CheckSanity( TTree* theTree )
{
TTree* tree = (0 != theTree) ? theTree : Data().GetTrainingTree();
for (Int_t i=0; i<GetNvar(); i++)
if (0 == tree->FindBranch( GetInputVar(i) )) return kFALSE;
return kTRUE;
}
void TMVA::MethodBase::SetWeightFileDir( TString fileDir )
{
fFileDir = fileDir;
gSystem->MakeDirectory( fFileDir );
}
Double_t TMVA::MethodBase::Norm( TString var, Double_t x ) const
{
return TMVA::Tools::NormVariable( x, GetXmin( var ), GetXmax( var ) );
}
Double_t TMVA::MethodBase::Norm( Int_t ivar, Double_t x ) const
{
return TMVA::Tools::NormVariable( x, GetXmin( ivar ), GetXmax( ivar ) );
}
void TMVA::MethodBase::TestInit(TTree* theTestTree)
{
if (theTestTree == 0)
theTestTree = Data().GetTestTree();
fHistS_plotbin = fHistB_plotbin = 0;
fHistS_highbin = fHistB_highbin = 0;
fEffS = fEffB = fEffBvsS = fRejBvsS = 0;
fGraphS = fGraphB = 0;
fCutOrientation = kPositive;
fSplS = fSplB = 0;
fSplRefS = fSplRefB = 0;
if (0 == theTestTree) {
fLogger << kFALSE << "<TestInit> test tree has zero pointer " << Endl;
fIsOK = kFALSE;
}
if ( 0 == theTestTree->FindBranch( GetTestvarName() ) && !(GetMethodName().Contains("Cuts"))) {
fLogger << kFALSE << "<TestInit> test variable " << GetTestvarName()
<< " not found in tree" << Endl;
fIsOK = kFALSE;
}
}
void TMVA::MethodBase::PrepareEvaluationTree( TTree* testTree )
{
if (0 == testTree) testTree = Data().GetTestTree();
if (!CheckSanity( testTree )) {
fLogger << kFALSE << "<PrepareEvaluationTree> sanity check failed" << Endl;
}
this->ReadStateFromFile();
TMVA::Timer timer( testTree->GetEntries(), GetName(), kTRUE );
Data().BaseRootDir()->cd();
Double_t myMVA = 0;
TBranch *newBranch = testTree->Branch( GetTestvarName(), &myMVA, GetTestvarName() + "/D", 128000 );
for (Int_t ievt=0; ievt<testTree->GetEntries(); ievt++) {
if ((Int_t)ievt%100 == 0) timer.DrawProgressBar( ievt );
ReadTestEvent(ievt);
newBranch->SetAddress(&myMVA);
myMVA = GetMvaValue();
newBranch->Fill();
}
Data().BaseRootDir()->Write("",TObject::kOverwrite);
fLogger << kINFO << "elapsed time for evaluation of "
<< testTree->GetEntries() << " events: "
<< timer.GetElapsedTime() << " " << Endl;
newBranch->ResetAddress();
}
void TMVA::MethodBase::Test( TTree *theTestTree )
{
if (theTestTree == 0) theTestTree = Data().GetTestTree();
Double_t meanS, meanB, rmsS, rmsB, xmin, xmax;
TMVA::Tools::ComputeStat( theTestTree, GetTestvarName(), meanS, meanB, rmsS, rmsB, xmin, xmax );
Double_t nrms = 4;
xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax );
fMeanS = meanS; fMeanB = meanB;
fRmsS = rmsS; fRmsB = rmsB;
fXmin = xmin; fXmax = xmax;
fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
fHistS_plotbin = TMVA::Tools::projNormTH1F( theTestTree, GetTestvarName(),
GetTestvarName() + "_S",
fNbins, fXmin, fXmax, "weight*(type == 1)" );
fHistB_plotbin = TMVA::Tools::projNormTH1F( theTestTree, GetTestvarName(),
GetTestvarName() + "_B",
fNbins, fXmin, fXmax, "weight*(type == 0)" );
fHistS_highbin = TMVA::Tools::projNormTH1F( theTestTree, GetTestvarName(),
GetTestvarName() + "_S_high",
fNbinsH, fXmin, fXmax, "weight*(type == 1)" );
fHistB_highbin = TMVA::Tools::projNormTH1F( theTestTree, GetTestvarName(),
GetTestvarName() + "_B_high",
fNbinsH, fXmin, fXmax, "weight*(type == 0)" );
fSplS = new TMVA::PDF( fHistS_plotbin, TMVA::PDF::kSpline2, 0 );
fSplB = new TMVA::PDF( fHistB_plotbin, TMVA::PDF::kSpline2, 0 );
}
Double_t TMVA::MethodBase::GetEfficiency( TString theString, TTree *theTree )
{
TList* list = TMVA::Tools::ParseFormatLine( theString );
if (list->GetSize() != 2) {
fLogger << kFALSE << "<GetEfficiency> wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05" << Endl;
return -1;
}
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
if (DEBUG_TMVA_MethodBase)
fLogger << kINFO << "<GetEfficiency> compute eff(S) at eff(B) = " << effBref << Endl;
if (fHistS_highbin->GetNbinsX() != fHistB_highbin->GetNbinsX() ||
fHistS_plotbin->GetNbinsX() != fHistB_plotbin->GetNbinsX()) {
fLogger << kWARNING << "<GetEfficiency> binning mismatch between signal and background histos" << Endl;
fIsOK = kFALSE;
return -1.0;
}
Double_t xmin = fHistS_highbin->GetXaxis()->GetXmin();
Double_t xmax = fHistS_highbin->GetXaxis()->GetXmax();
Bool_t firstPass = kFALSE;
if (NULL == fEffS && NULL == fEffB) firstPass = kTRUE;
if (firstPass) {
fEffS = new TH1F( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
fEffB = new TH1F( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
Int_t theType;
Double_t theVal;
TBranch* brType = theTree->GetBranch("type");
TBranch* brVal = theTree->GetBranch(GetTestvarName());
if (brVal == 0) {
fLogger << kFALSE << "could not find variable "
<< GetTestvarName() << " in tree " << theTree->GetName() << Endl;
}
brType->SetAddress(&theType);
brVal ->SetAddress(&theVal );
for (Int_t ievt=0; ievt<theTree->GetEntries(); ievt++) {
brType->GetEntry(ievt);
brVal ->GetEntry(ievt);
TH1* theHist = (theType==1?fEffS:fEffB);
for (Int_t bin=1; bin<=fNbinsH; bin++)
if (sign*theVal >= sign*theHist->GetBinLowEdge( bin )) theHist->AddBinContent( bin );
}
fEffS->Scale( 1.0/(fEffS->GetMaximum() > 0 ? fEffS->GetMaximum() : 1) );
fEffB->Scale( 1.0/(fEffB->GetMaximum() > 0 ? fEffB->GetMaximum() : 1) );
fEffBvsS = new TH1F( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
fRejBvsS = new TH1F( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
if (Use_Splines_for_Eff_) {
fGraphS = new TGraph( fEffS );
fGraphB = new TGraph( fEffB );
fSplRefS = new TMVA::TSpline1( "spline2_signal", fGraphS );
fSplRefB = new TMVA::TSpline1( "spline2_background", fGraphB );
fLogger << kVERBOSE << "<GetEfficiency> verify signal and background eff. splines" << Endl;
TMVA::Tools::CheckSplines( fEffS, fSplRefS );
TMVA::Tools::CheckSplines( fEffB, fSplRefB );
}
ResetThisBase();
TMVA::RootFinder rootFinder(&IGetEffForRoot, fXmin, fXmax );
Double_t effB = 0;
for (Int_t bini=1; bini<=fNbins; bini++) {
Double_t effS = fEffBvsS->GetBinCenter( bini );
Double_t cut = rootFinder.Root( effS );
if (Use_Splines_for_Eff_)
effB = fSplRefB->Eval( cut );
else
effB = fEffB->GetBinContent( fEffB->FindBin( cut ) );
fEffBvsS->SetBinContent( bini, effB );
fRejBvsS->SetBinContent( bini, 1.0-effB );
}
fGrapheffBvsS = new TGraph( fEffBvsS );
fSpleffBvsS = new TMVA::TSpline1( "effBvsS", fGrapheffBvsS );
}
if (NULL == fSpleffBvsS) return 0.0;
Double_t effS, effB, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSpleffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
return 0.5*(effS + effS_);
}
Double_t TMVA::MethodBase::GetTrainingEfficiency( TString theString)
{
TList* list = TMVA::Tools::ParseFormatLine( theString );
if (list->GetSize() != 2) {
fLogger << kFATAL << "<GetTrainingEfficiency> wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05" << Endl;
return -1;
}
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
if (DEBUG_TMVA_MethodBase)
fLogger << kINFO << "<GetTrainingEfficiency> compute eff(S) at eff(B) = "
<< effBref << Endl;
if (fHistS_highbin->GetNbinsX() != fHistB_highbin->GetNbinsX() ||
fHistS_plotbin->GetNbinsX() != fHistB_plotbin->GetNbinsX()) {
fLogger << kFATAL << "<GetTrainingEfficiency> binning mismatch between signal and background histos"
<< Endl;
fIsOK = kFALSE;
return -1.0;
}
Double_t xmin = fHistS_highbin->GetXaxis()->GetXmin();
Double_t xmax = fHistS_highbin->GetXaxis()->GetXmax();
Bool_t firstPass = kFALSE;
if (NULL == fTrainEffS && NULL == fTrainEffB) firstPass = kTRUE;
if (firstPass) {
fTrainEffS = new TH1F( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
fNbinsH, xmin, xmax );
fTrainEffB = new TH1F( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
fNbinsH, xmin, xmax );
Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
for (Int_t ievt=0; ievt<Data().GetNEvtTrain(); ievt++) {
ReadTrainingEvent(ievt);
TH1* theHist = (Data().Event().IsSignal() ? fTrainEffS : fTrainEffB);
Double_t theVal = this->GetMvaValue();
for (Int_t bin=1; bin<=fNbinsH; bin++)
if (sign*theVal > sign*theHist->GetBinCenter( bin )) theHist->AddBinContent( bin );
}
fTrainEffS->Scale( 1.0/(fTrainEffS->GetMaximum() > 0 ? fTrainEffS->GetMaximum() : 1) );
fTrainEffB->Scale( 1.0/(fTrainEffB->GetMaximum() > 0 ? fTrainEffB->GetMaximum() : 1) );
fTrainEffBvsS = new TH1F( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
fTrainRejBvsS = new TH1F( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
if (Use_Splines_for_Eff_) {
fGraphTrainS = new TGraph( fTrainEffS );
fGraphTrainB = new TGraph( fTrainEffB );
fSplTrainRefS = new TMVA::TSpline1( "spline2_signal", fGraphTrainS );
fSplTrainRefB = new TMVA::TSpline1( "spline2_background", fGraphTrainB );
fLogger << kVERBOSE << "<GetEfficiency> verify signal and background eff. splines" << Endl;
TMVA::Tools::CheckSplines( fTrainEffS, fSplTrainRefS );
TMVA::Tools::CheckSplines( fTrainEffB, fSplTrainRefB );
}
ResetThisBase();
TMVA::RootFinder rootFinder(&IGetEffForRoot, fXmin, fXmax );
Double_t effB = 0;
for (Int_t bini=1; bini<=fNbins; bini++) {
Double_t effS = fTrainEffBvsS->GetBinCenter( bini );
Double_t cut = rootFinder.Root( effS );
if (Use_Splines_for_Eff_)
effB = fSplTrainRefB->Eval( cut );
else
effB = fTrainEffB->GetBinContent( fTrainEffB->FindBin( cut ) );
fTrainEffBvsS->SetBinContent( bini, effB );
fTrainRejBvsS->SetBinContent( bini, 1.0-effB );
}
fGraphTrainEffBvsS = new TGraph( fTrainEffBvsS );
fSplTrainEffBvsS = new TMVA::TSpline1( "effBvsS", fGraphTrainEffBvsS );
}
if (NULL == fSplTrainEffBvsS) return 0.0;
Double_t effS, effB, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSplTrainEffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
return 0.5*(effS + effS_);
}
Double_t TMVA::MethodBase::GetSignificance( void )
{
Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
}
Double_t TMVA::MethodBase::GetSeparation( void )
{
Double_t separation = 0;
Int_t nstep = 1000;
Double_t intBin = (fXmax - fXmin)/nstep;
for (Int_t bin=0; bin<nstep; bin++) {
Double_t x = (bin + 0.5)*intBin + fXmin;
Double_t s = fSplS->GetVal( x );
Double_t b = fSplB->GetVal( x );
if (s + b > 0) separation += 0.5*(s - b)*(s - b)/(s + b);
}
separation *= intBin;
return separation;
}
Double_t TMVA::MethodBase::GetOptimalSignificance(Double_t SignalEvents,
Double_t BackgroundEvents,
Double_t& optimal_significance_value ) const
{
fLogger << kVERBOSE << "get optimal significance ..." << Endl;
Double_t optimal_significance(0);
Double_t effS(0),effB(0),significance(0);
TH1F *temp_histogram = new TH1F("temp", "temp", fNbinsH, fXmin, fXmax );
if (SignalEvents <= 0 || BackgroundEvents <= 0) {
fLogger << kFATAL << "<GetOptimalSignificance> "
<< "number of signal or background events is <= 0 ==> abort"
<< Endl;
}
fLogger << kINFO << "using ratio SignalEvents/BackgroundEvents = "
<< SignalEvents/BackgroundEvents << Endl;
if ((fEffS == 0) || (fEffB == 0)) {
fLogger << kWARNING << "efficiency histograms empty !" << Endl;
fLogger << kWARNING << "no optimal cut found, return 0" << Endl;
return 0;
}
for (Int_t bin=1; bin<=fNbinsH; bin++) {
effS = fEffS->GetBinContent( bin );
effB = fEffB->GetBinContent( bin );
significance = sqrt(SignalEvents) * ( effS )/sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
temp_histogram->SetBinContent(bin,significance);
}
optimal_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
optimal_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
temp_histogram->Delete();
fLogger << kINFO << "optimal cut at : " << optimal_significance << Endl;
fLogger << kINFO << "optimal significance: " << optimal_significance_value << Endl;
return optimal_significance;
}
Double_t TMVA::MethodBase::GetmuTransform( TTree *theTree )
{
Int_t nbin = 70;
fHistBhatS = new TH1F( GetTestvarName() + "_BhatS", GetTestvarName() + ": Bhat (S)", nbin, 0.0, 1.0 );
fHistBhatB = new TH1F( GetTestvarName() + "_BhatB", GetTestvarName() + ": Bhat (B)", nbin, 0.0, 1.0 );
fHistBhatS->Sumw2();
fHistBhatB->Sumw2();
vector<Double_t>* aBhatB = new vector<Double_t>;
vector<Double_t>* aBhatS = new vector<Double_t>;
Double_t x;
TBranch * br = theTree->GetBranch(GetTestvarName());
for (Int_t ievt=0; ievt<theTree->GetEntries(); ievt++) {
Data().ReadEvent(theTree,ievt);
br->SetAddress(&x);
br->GetEvent(ievt);
Double_t s = fSplS->GetVal( x );
Double_t b = fSplB->GetVal( x );
Double_t aBhat = 0;
if (b + s > 0) aBhat = b/(b + s);
if (Data().Event().IsSignal()) {
aBhatS->push_back ( aBhat );
fHistBhatS->Fill( aBhat );
}
else {
aBhatB->push_back ( aBhat );
fHistBhatB->Fill( aBhat );
}
}
fHistBhatS->Scale( 1.0/((fHistBhatS->GetEntries() > 0 ? fHistBhatS->GetEntries() : 1) / nbin) );
fHistBhatB->Scale( 1.0/((fHistBhatB->GetEntries() > 0 ? fHistBhatB->GetEntries() : 1) / nbin) );
TMVA::PDF* yB = new TMVA::PDF( fHistBhatB, TMVA::PDF::kSpline2, 100 );
Int_t nevtS = aBhatS->size();
Int_t nevtB = aBhatB->size();
Int_t nbinMu = 50;
fHistMuS = new TH1F( GetTestvarName() + "_muTransform_S",
GetTestvarName() + ": mu-Transform (S)", nbinMu, 0.0, 1.0 );
fHistMuB = new TH1F( GetTestvarName() + "_muTransform_B",
GetTestvarName() + ": mu-Transform (B)", nbinMu, 0.0, 1.0 );
for (Int_t ievt=0; ievt<nevtS; ievt++) {
Double_t w = yB->GetVal( (*aBhatS)[ievt] );
if (w > 0) fHistMuS->Fill( 1.0 - (*aBhatS)[ievt], 1.0/w );
}
for (Int_t ievt=0; ievt<nevtB; ievt++) {
Double_t w = yB->GetVal( (*aBhatB)[ievt] );
if (w > 0) fHistMuB->Fill( 1.0 - (*aBhatB)[ievt], 1.0/w );
}
TMVA::Tools::NormHist( fHistMuS );
TMVA::Tools::NormHist( fHistMuB );
TMVA::PDF* thePdf = new TMVA::PDF( fHistMuS, TMVA::PDF::kSpline2 );
Double_t intS = 0;
Int_t nstp = 10000;
for (Int_t istp=0; istp<nstp; istp++) {
Double_t x = (istp + 0.5)/Double_t(nstp);
intS += x*thePdf->GetVal( x );
}
intS /= Double_t(nstp);
delete yB;
delete thePdf;
delete aBhatB;
delete aBhatS;
return intS;
}
void TMVA::MethodBase::Statistics( TMVA::Types::ETreeType treeType, const TString& theVarName,
Double_t& meanS, Double_t& meanB,
Double_t& rmsS, Double_t& rmsB,
Double_t& xmin, Double_t& xmax,
Bool_t norm )
{
Long64_t entries = ( (treeType == TMVA::Types::kTesting ) ? Data().GetNEvtTest() :
(treeType == TMVA::Types::kTraining) ? Data().GetNEvtTrain() : -1 );
if (entries <=0)
fLogger << kFATAL << "<CalculateEstimator> wrong tree type: " << treeType << Endl;
UInt_t varIndex = Data().FindVar( theVarName );
Double_t* varVecS = new Double_t[entries];
Double_t* varVecB = new Double_t[entries];
xmin = +1e20;
xmax = -1e20;
Long64_t nEventsS = -1;
Long64_t nEventsB = -1;
for (Int_t i = 0; i < entries; i++) {
if (treeType == TMVA::Types::kTesting )
ReadTestEvent(i);
else
ReadTrainingEvent(i);
Double_t theVar = (norm) ? GetEventValNormalized(varIndex) : GetEventVal(varIndex);
if (Data().Event().IsSignal()) varVecS[++nEventsS] = theVar;
else varVecB[++nEventsB] = theVar;
xmin = TMath::Min( xmin, theVar );
xmax = TMath::Max( xmax, theVar );
}
++nEventsS;
++nEventsB;
meanS = TMath::Mean( nEventsS, varVecS );
meanB = TMath::Mean( nEventsB, varVecB );
rmsS = TMath::RMS ( nEventsS, varVecS );
rmsB = TMath::RMS ( nEventsB, varVecB );
delete [] varVecS;
delete [] varVecB;
}
void TMVA::MethodBase::WriteEvaluationHistosToFile( TDirectory* targetDir )
{
targetDir->cd();
if (0 != fHistS_plotbin) fHistS_plotbin->Write();
if (0 != fHistB_plotbin) fHistB_plotbin->Write();
if (0 != fHistS_highbin) fHistS_highbin->Write();
if (0 != fHistB_highbin) fHistB_highbin->Write();
if (0 != fEffS ) fEffS->Write();
if (0 != fEffB ) fEffB->Write();
if (0 != fEffBvsS ) fEffBvsS->Write();
if (0 != fRejBvsS ) fRejBvsS->Write();
if (0 != fHistBhatS ) fHistBhatS->Write();
if (0 != fHistBhatB ) fHistBhatB->Write();
if (0 != fHistMuS ) fHistMuS->Write();
if (0 != fHistMuB ) fHistMuB->Write();
}
TMVA::MethodBase* TMVA::MethodBase::fgThisBase = NULL;
Double_t TMVA::MethodBase::IGetEffForRoot( Double_t theCut )
{
return TMVA::MethodBase::GetThisBase()->GetEffForRoot( theCut );
}
Double_t TMVA::MethodBase::GetEffForRoot( Double_t theCut )
{
Double_t retval=0;
if (Use_Splines_for_Eff_) {
retval = fSplRefS->Eval( theCut );
}
else
retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
Double_t eps = 1.0e-5;
if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
return retval;
}
void TMVA::MethodBase::PrintOptions() const
{
fLogger << kINFO << "the following options are set:" << Endl;
TListIter optIt( & ListOfOptions() );
fLogger << kINFO << "by User:" << Endl;
fLogger << kINFO << "--------" << Endl;
while (OptionBase * opt = (OptionBase *) optIt()) {
if (opt->IsSet()) { fLogger << kINFO << " "; opt->Print(fLogger); fLogger << Endl; }
}
optIt.Reset();
fLogger << kINFO << "default:" << Endl;
fLogger << kINFO << "--------" << Endl;
while (OptionBase * opt = (OptionBase *) optIt()) {
if (!opt->IsSet()) { fLogger << kINFO << " "; opt->Print(fLogger); fLogger << Endl; }
}
}
void TMVA::MethodBase::WriteOptionsToStream(ostream& o) const
{
TListIter optIt( & ListOfOptions() );
o << "# Set by User:" << endl;
while (OptionBase * opt = (OptionBase *) optIt()) if (opt->IsSet()) { opt->Print(o); o << endl; }
optIt.Reset();
o << "# Default:" << endl;
while (OptionBase * opt = (OptionBase *) optIt()) if (!opt->IsSet()) { opt->Print(o); o << endl; }
o << "##" << endl;
}
void TMVA::MethodBase::ReadOptionsFromStream(istream& istr)
{
fOptions = "";
char buf[512];
istr.getline(buf,512);
TString stropt, strval;
while (istr.good() && !istr.eof() && !(buf[0]=='#' && buf[1]=='#')) {
char *p = buf;
while (*p==' ' || *p=='\t') p++;
if (*p=='#' || *p=='\0') {
istr.getline(buf,512);
continue;
}
std::stringstream sstr(buf);
sstr >> stropt >> strval;
stropt.ReplaceAll(':','=');
strval.ReplaceAll("\"","");
if (fOptions.Length()!=0) fOptions += ":";
fOptions += stropt;
fOptions += strval;
istr.getline(buf,512);
}
}
void TMVA::MethodBase::WriteMonitoringHistosToFile( void ) const
{
fLogger << kINFO << "no monitoring histograms written" << Endl;
}
ROOT page - Class index - Class Hierarchy - Top of the page
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.