178 "Number of times the classifier is boosted" );
181 "Write monitoring histograms for each boosted classifier" );
184 "Produce histograms for detailed boost monitoring" );
194 "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
197 "Type of transform applied to every boosted method linear, log, step" );
204 "Seed for random number generator used for bagging" );
220 "How to set the final weight of the boosted classifiers" );
228 "Type of transform applied to every boosted method linear, log, step" );
242 "Recalculate the classifier MVA Signallike cut at every boost iteration" );
279 results->
Store(
new TH1F(
"ROCIntegral_test",
"ROC integral of single classifier (testing sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegral_test");
280 results->
Store(
new TH1F(
"ROCIntegralBoosted_test",
"ROC integral of boosted method (testing sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegralBoosted_test");
281 results->
Store(
new TH1F(
"ROCIntegral_train",
"ROC integral of single classifier (training sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegral_train");
282 results->
Store(
new TH1F(
"ROCIntegralBoosted_train",
"ROC integral of boosted method (training sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegralBoosted_train");
345 Log() << kDEBUG <<
"CheckSetup: trying to repair things" <<
Endl;
360 if (
Data()->GetNTrainingEvents()==0)
Log() << kFATAL <<
"<Train> Data() has zero events" <<
Endl;
378 if (varTrafoStart >0) {
380 if (varTrafoEnd<varTrafoStart)
400 Log() << kFATAL <<
"uups.. guess the booking of the " <<
fCurrentMethodIdx <<
"-th classifier somehow failed" <<
Endl;
408 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" <<
Endl;
484 if (StopCounter > 0 &&
fBoostType !=
"Bagging") {
487 Log() << kINFO <<
"Error rate has reached 0.5 ("<<
fMethodError<<
"), boosting process stopped at #" <<
fBoostNum <<
" classifier" <<
Endl;
489 Log() << kINFO <<
"The classifier might be too strong to boost with Beta = " <<
fAdaBoostBeta <<
", try reducing it." <<
Endl;
511 TH1F* tmp =
dynamic_cast<TH1F*
>( results->
GetHist(
"ClassifierWeight") );
539 if (
fBoostNum <=0)
Log() << kFATAL <<
"CreateHistograms called before fBoostNum is initialized" <<
Endl;
543 Int_t signalClass = 0;
544 if (
DataInfo().GetClassInfo(
"Signal") != 0) {
545 signalClass =
DataInfo().GetClassInfo(
"Signal")->GetNumber();
548 meanS, meanB, rmsS, rmsB,
xmin,
xmax, signalClass );
571 const Event *ev =
Data()->GetEvent(ievt);
620 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
625 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
644 for (
UInt_t imtd=0;imtd<nloop;imtd++) {
649 if (dir==0)
continue;
694 const Int_t nBins=10001;
697 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
701 if (val>maxMVA) maxMVA=val;
702 if (val<minMVA) minMVA=val;
704 maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
720 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
725 mvaS->Fill(mvaVal,weight);
727 mvaB->
Fill(mvaVal,weight);
763 for (
Int_t ibin=1;ibin<=nBins;ibin++){
774 if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
781 if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
782 else mvaCutOrientation=1;
815 <<
" s2="<<(sTot-sSelCut)
816 <<
" b2="<<(bTot-bSelCut)
817 <<
" s/b(1)=" << sSelCut/bSelCut
818 <<
" s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
819 <<
" index before cut=" << parentIndex
820 <<
" after: left=" << leftIndex
821 <<
" after: right=" << rightIndex
822 <<
" sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
823 <<
" sepGain="<<separationGain
827 <<
" cutOrientation="<<mvaCutOrientation
858 Log() << kFATAL <<
"<Boost> unknown boost option " <<
fBoostType<<
" called" <<
Endl;
869 Log() << kWARNING <<
" AdaBoost called without classifier reference - needed for calculating AdaBoost " <<
Endl;
878 if (discreteAdaBoost) {
919 if (discreteAdaBoost){
921 WrongDetection[ievt]=
kFALSE;
923 WrongDetection[ievt]=
kTRUE;
928 mvaProb = 2*(mvaProb-0.5);
930 if (
DataInfo().IsSignal(ev)) trueType = 1;
932 sumWrong+= w*trueType*mvaProb;
944 Log() << kWARNING <<
"Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " <<
Endl;
947 if (discreteAdaBoost)
967 const Event* ev =
Data()->GetEvent(ievt);
969 if (discreteAdaBoost){
971 if (WrongDetection[ievt] && boostWeight != 0) {
982 mvaProb = 2*(mvaProb-0.5);
986 if (
DataInfo().IsSignal(ev)) trueType = 1;
989 boostfactor =
TMath::Exp(-1*boostWeight*trueType*mvaProb);
997 Double_t normWeight = oldSum/newSum;
1001 const Event* ev =
Data()->GetEvent(ievt);
1011 const Event* ev =
Data()->GetEvent(ievt);
1018 delete[] WrongDetection;
1019 if (MVAProb)
delete MVAProb;
1034 const Event* ev =
Data()->GetEvent(ievt);
1053 Log() <<
"This method combines several classifier of one species in a "<<
Endl;
1054 Log() <<
"single multivariate quantity via the boost algorithm." <<
Endl;
1055 Log() <<
"the output is a weighted sum over all individual classifiers" <<
Endl;
1056 Log() <<
"By default, the AdaBoost method is employed, which gives " <<
Endl;
1057 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
1058 Log() <<
"weight in the training of the following classifier."<<
Endl;
1059 Log() <<
"Optionally, Bagged boosting can also be applied." <<
Endl;
1063 Log() <<
"The most important parameter in the configuration is the "<<
Endl;
1064 Log() <<
"number of boosts applied (Boost_Num) and the choice of boosting"<<
Endl;
1065 Log() <<
"(Boost_Type), which can be set to either AdaBoost or Bagging." <<
Endl;
1066 Log() <<
"AdaBoosting: The most important parameters in this configuration" <<
Endl;
1067 Log() <<
"is the beta parameter (Boost_AdaBoostBeta) " <<
Endl;
1068 Log() <<
"When boosting a linear classifier, it is sometimes advantageous"<<
Endl;
1069 Log() <<
"to transform the MVA output non-linearly. The following options" <<
Endl;
1070 Log() <<
"are available: step, log, and minmax, the default is no transform."<<
Endl;
1072 Log() <<
"Some classifiers are hard to boost and do not improve much in"<<
Endl;
1073 Log() <<
"their performance by boosting them, some even slightly deteriorate"<<
Endl;
1074 Log() <<
"due to the boosting." <<
Endl;
1075 Log() <<
"The booking of the boost method is special since it requires"<<
Endl;
1076 Log() <<
"the booing of the method to be boosted and the boost itself."<<
Endl;
1077 Log() <<
"This is solved by booking the method to be boosted and to add"<<
Endl;
1078 Log() <<
"all Boost parameters, which all begin with \"Boost_\" to the"<<
Endl;
1079 Log() <<
"options string. The factory separates the options and initiates"<<
Endl;
1080 Log() <<
"the boost process. The TMVA macro directory contains the example"<<
Endl;
1081 Log() <<
"macro \"Boost.C\"" <<
Endl;
1104 Double_t sigcut =
m->GetSignalReferenceCut();
1111 if (val < sigcut) val = sigcut;
1116 if (
m->IsSignalLike(val)) val = 1.;
1158 Data()->SetCurrentType(eTT);
1164 if (singleMethod && !method) {
1165 Log() << kFATAL <<
" What do you do? Your method:"
1167 <<
" seems not to be a propper TMVA method"
1177 if (!singleMethod) {
1183 if (AllMethodsWeight != 0.0) {
1191 std::vector <Float_t>* mvaRes;
1195 mvaRes =
new std::vector <Float_t>(
GetNEvents());
1207 Int_t signalClass = 0;
1208 if (
DataInfo().GetClassInfo(
"Signal") != 0) {
1209 signalClass =
DataInfo().GetClassInfo(
"Signal")->GetNumber();
1212 meanS, meanB, rmsS, rmsB,
xmin,
xmax, signalClass );
1221 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1222 if (CalcOverlapIntergral) {
1229 if (
DataInfo().IsSignal(ev)) mva_s->
Fill( (*mvaRes)[ievt], w );
1230 else mva_b->
Fill( (*mvaRes)[ievt], w );
1232 if (CalcOverlapIntergral) {
1235 mva_s_overlap->
Fill( (*mvaRes)[ievt], w_ov );
1237 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1249 if (CalcOverlapIntergral) {
1256 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1257 if (bc_s > 0.0 && bc_b > 0.0)
1261 delete mva_s_overlap;
1262 delete mva_b_overlap;
1284 Log() << kFATAL <<
"dynamic cast to MethodBase* failed" <<
Endl;
1326 Log() << kINFO <<
"<Train> average number of nodes before/after pruning : "
1338 if (methodIndex < 3){
1339 Log() << kDEBUG <<
"No detailed boost monitoring for "
1341 <<
" yet available " <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
int Int_t
Signed integer 4 bytes (int).
int Ssiz_t
String size (currently int).
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
long long Long64_t
Portable signed long integer 8 bytes.
float Float_t
Float 4 bytes (float).
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Describe directory structure in memory.
virtual Bool_t cd()
Change current directory to "this" directory.
1-D histogram with a double per channel (see TH1 documentation)
1-D histogram with a float per channel (see TH1 documentation)
1-D histogram with an int per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
virtual Int_t GetNbinsX() const
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
2-D histogram with a float per channel (see TH1 documentation)
Service class for 2-D histogram classes.
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
class TMVA::Config::VariablePlotting fVariablePlotting
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
const TString & GetOptions() const
Class that contains all the data information.
void ScaleBoostWeight(Double_t s) const
void SetBoostWeight(Double_t w) const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Double_t GetOriginalWeight() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Implementation of the GiniIndex as separation criterion.
Interface for all concrete MVA method implementations.
Virtual base Class for all MVA method.
const char * GetName() const override
void SetSilentFile(Bool_t status)
void SetWeightFileDir(TString fileDir)
set directory of weight file
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Types::EAnalysisType GetAnalysisType() const
Bool_t IsModelPersistence() const
const TString & GetJobName() const
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
const TString & GetMethodName() const
UInt_t GetNEvents() const
const Event * GetEvent() const
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
DataSetInfo & DataInfo() const
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
const Event * fTmpEvent
! temporary event when testing on a different DataSet than the own one
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Double_t GetMvaValue(Double_t *errLower=nullptr, Double_t *errUpper=nullptr) override=0
Bool_t IsSilentFile() const
Types::EMVA GetMethodType() const
void SetSignalReferenceCut(Double_t cut)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
void SetModelPersistence(Bool_t status)
Double_t GetSignalReferenceCut() const
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype) override
writes all MVA evaluation histograms to file
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Double_t fAdaBoostBeta
ADA boost parameter, default is 1.
TString fBoostedMethodOptions
options
Double_t fBaggedSampleFraction
rel.Size of bagged sample
void DeclareCompatibilityOptions() override
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Double_t fBoostWeight
the weight used to boost the next classifier
std::vector< TH1 * > fTestSigMVAHist
void ProcessOptions() override
process user options
void SingleTrain()
initialization
virtual void TestClassification() override
initialization
UInt_t fBoostNum
Number of times the classifier is boosted.
Bool_t fDetailedMonitoring
produce detailed monitoring histograms (boost-wise)
UInt_t fRandomSeed
seed for random number generator used for bagging
DataSetManager * fDataSetManager
DSMTEST.
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
std::vector< TH1 * > fTestBgdMVAHist
void CreateMVAHistorgrams()
Bool_t fHistoricBoolOption
historic variable, only needed for "CompatibilityOptions"
TTree * fMonitorTree
tree to monitor values during the boosting
void DeclareOptions() override
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
TString fBoostedMethodTitle
title
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
std::vector< TH1 * > fTrainSigMVAHist
TString fHistoricOption
historic variable, only needed for "CompatibilityOptions"
void InitHistos()
initialisation routine
void GetHelpMessage() const override
Get help message text.
TString fBoostType
string specifying the boost type
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t) override
Boost can handle classification with 2 classes and regression with one regression-target.
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
return boosted MVA response
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
const Ranking * CreateRanking() override
Double_t fROC_training
roc integral of last trained method (on training sample)
TString fBoostedMethodName
details of the boosted classifier
std::vector< TH1 * > fBTrainBgdMVAHist
std::vector< TH1 * > fTrainBgdMVAHist
Double_t SingleBoost(MethodBase *method)
void Train(void) override
std::vector< TH1 * > fBTrainSigMVAHist
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
void WriteMonitoringHistosToFile(void) const override
write special monitoring histograms to file dummy implementation here --------------—
Double_t fMethodError
estimation of the level error of the classifier
TString fTransformString
min and max values for the classifier response
std::vector< Float_t > * fMVAvalues
mva values for the last trained method
Bool_t fMonitorBoostedMethod
monitor the MVA response of every classifier
virtual ~MethodBoost(void)
destructor
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
Double_t fOverlap_integral
void CheckSetup() override
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Class for categorizing the phase space.
DataSetManager * fDataSetManager
IMethod * GetLastMethod()
MethodBase * fCurrentMethod
std::vector< Double_t > fMethodWeight
MethodBase * GetCurrentMethod()
MethodCompositeBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
std::vector< IMethod * > fMethods
vector of all classifiers
Analysis of Boosted Decision Trees.
Int_t GetNNodesBeforePruning()
static void InhibitOutput()
static void EnableOutput()
PDF wrapper for histograms; uses user-defined spline interpolation.
Double_t GetMVAProbAt(Double_t value)
void AddEvent(Double_t val, Double_t weight, Int_t type)
Ranking for variables in method (implementation).
Class that is the base-class for a vector of result.
void Store(TObject *obj, const char *alias=nullptr)
TH2 * GetHist2D(const TString &alias) const
TH1 * GetHist(const TString &alias) const
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Singleton class for Global types used by TMVA.
TString GetMethodName(Types::EMVA method) const
static Types & Instance()
The single instance of "Types" if existing already, or create it (Singleton).
@ kMaxTreeType
also used as temporary storage for trees not yet assigned for testing;training...
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
virtual void Delete(Option_t *option="")
Delete this object.
Random number generator class based on M.
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
void ToLower()
Change string to lower-case.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculates a gaussian function with mean and sigma.
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.