179 "Number of times the classifier is boosted" );
182 "Write monitoring histograms for each boosted classifier" );
185 "Produce histograms for detailed boost monitoring" );
195 "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
198 "Type of transform applied to every boosted method linear, log, step" );
205 "Seed for random number generator used for bagging" );
221 "How to set the final weight of the boosted classifiers" );
229 "Type of transform applied to every boosted method linear, log, step" );
243 "Recalculate the classifier MVA Signallike cut at every boost iteration" );
280 results->
Store(
new TH1F(
"ROCIntegral_test",
"ROC integral of single classifier (testing sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegral_test");
281 results->
Store(
new TH1F(
"ROCIntegralBoosted_test",
"ROC integral of boosted method (testing sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegralBoosted_test");
282 results->
Store(
new TH1F(
"ROCIntegral_train",
"ROC integral of single classifier (training sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegral_train");
283 results->
Store(
new TH1F(
"ROCIntegralBoosted_train",
"ROC integral of boosted method (training sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegralBoosted_train");
346 Log() << kDEBUG <<
"CheckSetup: trying to repair things" <<
Endl;
361 if (
Data()->GetNTrainingEvents()==0)
Log() << kFATAL <<
"<Train> Data() has zero events" <<
Endl;
379 if (varTrafoStart >0) {
381 if (varTrafoEnd<varTrafoStart)
401 Log() << kFATAL <<
"uups.. guess the booking of the " <<
fCurrentMethodIdx <<
"-th classifier somehow failed" <<
Endl;
409 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" <<
Endl;
485 if (StopCounter > 0 &&
fBoostType !=
"Bagging") {
488 Log() << kINFO <<
"Error rate has reached 0.5 ("<<
fMethodError<<
"), boosting process stopped at #" <<
fBoostNum <<
" classifier" <<
Endl;
490 Log() << kINFO <<
"The classifier might be too strong to boost with Beta = " <<
fAdaBoostBeta <<
", try reducing it." <<
Endl;
512 TH1F* tmp =
dynamic_cast<TH1F*
>( results->
GetHist(
"ClassifierWeight") );
540 if (
fBoostNum <=0)
Log() << kFATAL <<
"CreateHistograms called before fBoostNum is initialized" <<
Endl;
544 Int_t signalClass = 0;
545 if (
DataInfo().GetClassInfo(
"Signal") != 0) {
546 signalClass =
DataInfo().GetClassInfo(
"Signal")->GetNumber();
549 meanS, meanB, rmsS, rmsB,
xmin,
xmax, signalClass );
572 const Event *ev =
Data()->GetEvent(ievt);
621 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
626 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
645 for (
UInt_t imtd=0;imtd<nloop;imtd++) {
650 if (dir==0)
continue;
695 const Int_t nBins=10001;
698 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
702 if (val>maxMVA) maxMVA=val;
703 if (val<minMVA) minMVA=val;
705 maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
721 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
726 mvaS->Fill(mvaVal,weight);
728 mvaB->
Fill(mvaVal,weight);
764 for (
Int_t ibin=1;ibin<=nBins;ibin++){
775 if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
782 if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
783 else mvaCutOrientation=1;
816 <<
" s2="<<(sTot-sSelCut)
817 <<
" b2="<<(bTot-bSelCut)
818 <<
" s/b(1)=" << sSelCut/bSelCut
819 <<
" s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
820 <<
" index before cut=" << parentIndex
821 <<
" after: left=" << leftIndex
822 <<
" after: right=" << rightIndex
823 <<
" sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
824 <<
" sepGain="<<separationGain
828 <<
" cutOrientation="<<mvaCutOrientation
859 Log() << kFATAL <<
"<Boost> unknown boost option " <<
fBoostType<<
" called" <<
Endl;
870 Log() << kWARNING <<
" AdaBoost called without classifier reference - needed for calculating AdaBoost " <<
Endl;
879 if (discreteAdaBoost) {
920 if (discreteAdaBoost){
922 WrongDetection[ievt]=
kFALSE;
924 WrongDetection[ievt]=
kTRUE;
929 mvaProb = 2*(mvaProb-0.5);
931 if (
DataInfo().IsSignal(ev)) trueType = 1;
933 sumWrong+=
w*trueType*mvaProb;
945 Log() << kWARNING <<
"Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " <<
Endl;
948 if (discreteAdaBoost)
968 const Event* ev =
Data()->GetEvent(ievt);
970 if (discreteAdaBoost){
972 if (WrongDetection[ievt] && boostWeight != 0) {
983 mvaProb = 2*(mvaProb-0.5);
987 if (
DataInfo().IsSignal(ev)) trueType = 1;
990 boostfactor =
TMath::Exp(-1*boostWeight*trueType*mvaProb);
998 Double_t normWeight = oldSum/newSum;
1002 const Event* ev =
Data()->GetEvent(ievt);
1012 const Event* ev =
Data()->GetEvent(ievt);
1019 delete[] WrongDetection;
1020 if (MVAProb)
delete MVAProb;
1035 const Event* ev =
Data()->GetEvent(ievt);
1054 Log() <<
"This method combines several classifier of one species in a "<<
Endl;
1055 Log() <<
"single multivariate quantity via the boost algorithm." <<
Endl;
1056 Log() <<
"the output is a weighted sum over all individual classifiers" <<
Endl;
1057 Log() <<
"By default, the AdaBoost method is employed, which gives " <<
Endl;
1058 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
1059 Log() <<
"weight in the training of the following classifier."<<
Endl;
1060 Log() <<
"Optionally, Bagged boosting can also be applied." <<
Endl;
1064 Log() <<
"The most important parameter in the configuration is the "<<
Endl;
1065 Log() <<
"number of boosts applied (Boost_Num) and the choice of boosting"<<
Endl;
1066 Log() <<
"(Boost_Type), which can be set to either AdaBoost or Bagging." <<
Endl;
1067 Log() <<
"AdaBoosting: The most important parameters in this configuration" <<
Endl;
1068 Log() <<
"is the beta parameter (Boost_AdaBoostBeta) " <<
Endl;
1069 Log() <<
"When boosting a linear classifier, it is sometimes advantageous"<<
Endl;
1070 Log() <<
"to transform the MVA output non-linearly. The following options" <<
Endl;
1071 Log() <<
"are available: step, log, and minmax, the default is no transform."<<
Endl;
1073 Log() <<
"Some classifiers are hard to boost and do not improve much in"<<
Endl;
1074 Log() <<
"their performance by boosting them, some even slightly deteriorate"<<
Endl;
1075 Log() <<
"due to the boosting." <<
Endl;
1076 Log() <<
"The booking of the boost method is special since it requires"<<
Endl;
1077 Log() <<
"the booing of the method to be boosted and the boost itself."<<
Endl;
1078 Log() <<
"This is solved by booking the method to be boosted and to add"<<
Endl;
1079 Log() <<
"all Boost parameters, which all begin with \"Boost_\" to the"<<
Endl;
1080 Log() <<
"options string. The factory separates the options and initiates"<<
Endl;
1081 Log() <<
"the boost process. The TMVA macro directory contains the example"<<
Endl;
1082 Log() <<
"macro \"Boost.C\"" <<
Endl;
1105 Double_t sigcut =
m->GetSignalReferenceCut();
1112 if (val < sigcut) val = sigcut;
1117 if (
m->IsSignalLike(val)) val = 1.;
1159 Data()->SetCurrentType(eTT);
1165 if (singleMethod && !method) {
1166 Log() << kFATAL <<
" What do you do? Your method:"
1168 <<
" seems not to be a propper TMVA method"
1178 if (!singleMethod) {
1184 if (AllMethodsWeight != 0.0) {
1192 std::vector <Float_t>* mvaRes;
1196 mvaRes =
new std::vector <Float_t>(
GetNEvents());
1208 Int_t signalClass = 0;
1209 if (
DataInfo().GetClassInfo(
"Signal") != 0) {
1210 signalClass =
DataInfo().GetClassInfo(
"Signal")->GetNumber();
1213 meanS, meanB, rmsS, rmsB,
xmin,
xmax, signalClass );
1222 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1223 if (CalcOverlapIntergral) {
1230 if (
DataInfo().IsSignal(ev)) mva_s->
Fill( (*mvaRes)[ievt],
w );
1231 else mva_b->
Fill( (*mvaRes)[ievt],
w );
1233 if (CalcOverlapIntergral) {
1236 mva_s_overlap->
Fill( (*mvaRes)[ievt], w_ov );
1238 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1250 if (CalcOverlapIntergral) {
1257 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1258 if (bc_s > 0.0 && bc_b > 0.0)
1262 delete mva_s_overlap;
1263 delete mva_b_overlap;
1285 Log() << kFATAL <<
"dynamic cast to MethodBase* failed" <<
Endl;
1327 Log() << kINFO <<
"<Train> average number of nodes before/after pruning : "
1339 if (methodIndex < 3){
1340 Log() << kDEBUG <<
"No detailed boost monitoring for "
1342 <<
" yet available " <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void w
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Describe directory structure in memory.
virtual Bool_t cd()
Change current directory to "this" directory.
1-D histogram with a double per channel (see TH1 documentation)
1-D histogram with a float per channel (see TH1 documentation)
1-D histogram with an int per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
virtual Int_t GetNbinsX() const
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
2-D histogram with a float per channel (see TH1 documentation)
Service class for 2-D histogram classes.
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
class TMVA::Config::VariablePlotting fVariablePlotting
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
const TString & GetOptions() const
Class that contains all the data information.
void ScaleBoostWeight(Double_t s) const
void SetBoostWeight(Double_t w) const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Double_t GetOriginalWeight() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Implementation of the GiniIndex as separation criterion.
Interface for all concrete MVA method implementations.
Virtual base Class for all MVA method.
virtual Double_t GetMvaValue(Double_t *errLower=nullptr, Double_t *errUpper=nullptr)=0
void SetSilentFile(Bool_t status)
void SetWeightFileDir(TString fileDir)
set directory of weight file
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
const char * GetName() const
Types::EAnalysisType GetAnalysisType() const
Bool_t IsModelPersistence() const
const TString & GetJobName() const
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
const TString & GetMethodName() const
UInt_t GetNEvents() const
const Event * GetEvent() const
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
DataSetInfo & DataInfo() const
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Bool_t IsSilentFile() const
Types::EMVA GetMethodType() const
void SetSignalReferenceCut(Double_t cut)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
void SetModelPersistence(Bool_t status)
Double_t GetSignalReferenceCut() const
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
Class for boosting a TMVA method.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Double_t fAdaBoostBeta
ADA boost parameter, default is 1.
TString fBoostedMethodOptions
options
Double_t fBaggedSampleFraction
rel.Size of bagged sample
Double_t fBoostWeight
the weight used to boost the next classifier
std::vector< TH1 * > fTestSigMVAHist
void SingleTrain()
initialization
UInt_t fBoostNum
Number of times the classifier is boosted.
Bool_t fDetailedMonitoring
produce detailed monitoring histograms (boost-wise)
UInt_t fRandomSeed
seed for random number generator used for bagging
DataSetManager * fDataSetManager
DSMTEST.
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
std::vector< TH1 * > fTestBgdMVAHist
void CreateMVAHistorgrams()
Bool_t fHistoricBoolOption
historic variable, only needed for "CompatibilityOptions"
TTree * fMonitorTree
tree to monitor values during the boosting
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------—
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
TString fBoostedMethodTitle
title
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
return boosted MVA response
std::vector< TH1 * > fTrainSigMVAHist
TString fHistoricOption
historic variable, only needed for "CompatibilityOptions"
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual void TestClassification()
initialization
void InitHistos()
initialisation routine
void ProcessOptions()
process user options
TString fBoostType
string specifying the boost type
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Double_t fROC_training
roc integral of last trained method (on training sample)
TString fBoostedMethodName
details of the boosted classifier
std::vector< TH1 * > fBTrainBgdMVAHist
std::vector< TH1 * > fTrainBgdMVAHist
Double_t SingleBoost(MethodBase *method)
std::vector< TH1 * > fBTrainSigMVAHist
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
const Ranking * CreateRanking()
Double_t fMethodError
estimation of the level error of the classifier
TString fTransformString
min and max values for the classifier response
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
std::vector< Float_t > * fMVAvalues
mva values for the last trained method
Bool_t fMonitorBoostedMethod
monitor the MVA response of every classifier
virtual ~MethodBoost(void)
destructor
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
Double_t fOverlap_integral
void GetHelpMessage() const
Get help message text.
Class for categorizing the phase space.
DataSetManager * fDataSetManager
IMethod * GetLastMethod()
MethodBase * fCurrentMethod
std::vector< Double_t > fMethodWeight
MethodBase * GetCurrentMethod()
MethodCompositeBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
std::vector< IMethod * > fMethods
vector of all classifiers
Analysis of Boosted Decision Trees.
Int_t GetNNodesBeforePruning()
static void InhibitOutput()
static void EnableOutput()
PDF wrapper for histograms; uses user-defined spline interpolation.
Double_t GetMVAProbAt(Double_t value)
void AddEvent(Double_t val, Double_t weight, Int_t type)
Ranking for variables in method (implementation)
Class that is the base-class for a vector of result.
void Store(TObject *obj, const char *alias=nullptr)
TH2 * GetHist2D(const TString &alias) const
TH1 * GetHist(const TString &alias) const
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Singleton class for Global types used by TMVA.
TString GetMethodName(Types::EMVA method) const
static Types & Instance()
The single instance of "Types" if existing already, or create it (Singleton)
@ kMaxTreeType
also used as temporary storage for trees not yet assigned for testing;training...
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
virtual void Delete(Option_t *option="")
Delete this object.
Random number generator class based on M.
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
void ToLower()
Change string to lower-case.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculates a gaussian function with mean and sigma.
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.