100 , fDetailedMonitoring(
kFALSE)
103 , fBaggedSampleFraction(0)
104 , fBoostedMethodTitle(methodTitle)
105 , fBoostedMethodOptions(theOption)
106 , fMonitorBoostedMethod(
kFALSE)
111 , fOverlap_integral(0.0)
183 "Number of times the classifier is boosted" );
186 "Write monitoring histograms for each boosted classifier" );
189 "Produce histograms for detailed boost monitoring" );
199 "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
202 "Type of transform applied to every boosted method linear, log, step" );
209 "Seed for random number generator used for bagging" );
225 "How to set the final weight of the boosted classifiers" );
233 "Type of transform applied to every boosted method linear, log, step" );
247 "Recalculate the classifier MVA Signallike cut at every boost iteration" );
284 results->
Store(
new TH1F(
"ROCIntegral_test",
"ROC integral of single classifier (testing sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegral_test");
285 results->
Store(
new TH1F(
"ROCIntegralBoosted_test",
"ROC integral of boosted method (testing sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegralBoosted_test");
286 results->
Store(
new TH1F(
"ROCIntegral_train",
"ROC integral of single classifier (training sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegral_train");
287 results->
Store(
new TH1F(
"ROCIntegralBoosted_train",
"ROC integral of boosted method (training sample)",
fBoostNum,0,
fBoostNum),
"ROCIntegralBoosted_train");
350 Log() << kDEBUG <<
"CheckSetup: trying to repair things" <<
Endl;
365 if (
Data()->GetNTrainingEvents()==0)
Log() << kFATAL <<
"<Train> Data() has zero events" <<
Endl;
383 if (varTrafoStart >0) {
385 if (varTrafoEnd<varTrafoStart)
405 Log() << kFATAL <<
"uups.. guess the booking of the " <<
fCurrentMethodIdx <<
"-th classifier somehow failed" <<
Endl;
413 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" <<
Endl;
489 if (StopCounter > 0 &&
fBoostType !=
"Bagging") {
492 Log() << kINFO <<
"Error rate has reached 0.5 ("<<
fMethodError<<
"), boosting process stopped at #" << fBoostNum <<
" classifier" <<
Endl;
494 Log() << kINFO <<
"The classifier might be too strong to boost with Beta = " <<
fAdaBoostBeta <<
", try reducing it." <<
Endl;
516 TH1F* tmp =
dynamic_cast<TH1F*
>( results->
GetHist(
"ClassifierWeight") );
544 if (
fBoostNum <=0)
Log() << kFATAL <<
"CreateHistograms called before fBoostNum is initialized" <<
Endl;
548 Int_t signalClass = 0;
549 if (
DataInfo().GetClassInfo(
"Signal") != 0) {
553 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
625 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
630 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
649 for (
UInt_t imtd=0;imtd<nloop;imtd++) {
654 if (dir==0)
continue;
699 const Int_t nBins=10001;
706 if (val>maxMVA) maxMVA=val;
707 if (val<minMVA) minMVA=val;
709 maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
733 mvaS->
Fill(mvaVal,weight);
735 mvaB->
Fill(mvaVal,weight);
771 for (
Int_t ibin=1;ibin<=nBins;ibin++){
782 if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
789 if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
790 else mvaCutOrientation=1;
823 <<
" s2="<<(sTot-sSelCut)
824 <<
" b2="<<(bTot-bSelCut)
825 <<
" s/b(1)=" << sSelCut/bSelCut
826 <<
" s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
827 <<
" index before cut=" << parentIndex
828 <<
" after: left=" << leftIndex
829 <<
" after: right=" << rightIndex
830 <<
" sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
831 <<
" sepGain="<<separationGain
835 <<
" cutOrientation="<<mvaCutOrientation
866 Log() << kFATAL <<
"<Boost> unknown boost option " <<
fBoostType<<
" called" <<
Endl;
877 Log() << kWARNING <<
" AdaBoost called without classifier reference - needed for calculating AdaBoost " <<
Endl;
886 if (discreteAdaBoost) {
927 if (discreteAdaBoost){
929 WrongDetection[ievt]=
kFALSE;
931 WrongDetection[ievt]=
kTRUE;
936 mvaProb = 2*(mvaProb-0.5);
940 sumWrong+= w*trueType*mvaProb;
952 Log() << kWARNING <<
"Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " <<
Endl;
955 if (discreteAdaBoost)
977 if (discreteAdaBoost){
979 if (WrongDetection[ievt] && boostWeight != 0) {
990 mvaProb = 2*(mvaProb-0.5);
997 boostfactor =
TMath::Exp(-1*boostWeight*trueType*mvaProb);
1005 Double_t normWeight = oldSum/newSum;
1026 delete[] WrongDetection;
1027 if (MVAProb)
delete MVAProb;
1061 Log() <<
"This method combines several classifier of one species in a "<<
Endl;
1062 Log() <<
"single multivariate quantity via the boost algorithm." <<
Endl;
1063 Log() <<
"the output is a weighted sum over all individual classifiers" <<
Endl;
1064 Log() <<
"By default, the AdaBoost method is employed, which gives " <<
Endl;
1065 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
1066 Log() <<
"weight in the training of the following classifier."<<
Endl;
1067 Log() <<
"Optionally, Bagged boosting can also be applied." <<
Endl;
1071 Log() <<
"The most important parameter in the configuration is the "<<
Endl;
1072 Log() <<
"number of boosts applied (Boost_Num) and the choice of boosting"<<
Endl;
1073 Log() <<
"(Boost_Type), which can be set to either AdaBoost or Bagging." <<
Endl;
1074 Log() <<
"AdaBoosting: The most important parameters in this configuration" <<
Endl;
1075 Log() <<
"is the beta parameter (Boost_AdaBoostBeta) " <<
Endl;
1076 Log() <<
"When boosting a linear classifier, it is sometimes advantageous"<<
Endl;
1077 Log() <<
"to transform the MVA output non-linearly. The following options" <<
Endl;
1078 Log() <<
"are available: step, log, and minmax, the default is no transform."<<
Endl;
1080 Log() <<
"Some classifiers are hard to boost and do not improve much in"<<
Endl;
1081 Log() <<
"their performance by boosting them, some even slightly deteriorate"<<
Endl;
1082 Log() <<
"due to the boosting." <<
Endl;
1083 Log() <<
"The booking of the boost method is special since it requires"<<
Endl;
1084 Log() <<
"the booing of the method to be boosted and the boost itself."<<
Endl;
1085 Log() <<
"This is solved by booking the method to be boosted and to add"<<
Endl;
1086 Log() <<
"all Boost parameters, which all begin with \"Boost_\" to the"<<
Endl;
1087 Log() <<
"options string. The factory separates the options and initiates"<<
Endl;
1088 Log() <<
"the boost process. The TMVA macro directory contains the example"<<
Endl;
1089 Log() <<
"macro \"Boost.C\"" <<
Endl;
1119 if (val < sigcut) val = sigcut;
1134 norm +=fMethodWeight[i];
1172 if (singleMethod && !method) {
1173 Log() << kFATAL <<
" What do you do? Your method:" 1175 <<
" seems not to be a propper TMVA method" 1185 if (!singleMethod) {
1191 if (AllMethodsWeight != 0.0) {
1199 std::vector <Float_t>* mvaRes;
1203 mvaRes =
new std::vector <Float_t>(
GetNEvents());
1215 Int_t signalClass = 0;
1216 if (
DataInfo().GetClassInfo(
"Signal") != 0) {
1220 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1227 TH1* mva_s =
new TH1F(
"MVA_S",
"MVA_S",
fNbins, xmin, xmax );
1228 TH1* mva_b =
new TH1F(
"MVA_B",
"MVA_B",
fNbins, xmin, xmax );
1229 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1230 if (CalcOverlapIntergral) {
1231 mva_s_overlap =
new TH1F(
"MVA_S_OVERLAP",
"MVA_S_OVERLAP",
fNbins, xmin, xmax );
1232 mva_b_overlap =
new TH1F(
"MVA_B_OVERLAP",
"MVA_B_OVERLAP",
fNbins, xmin, xmax );
1238 else mva_b->
Fill( (*mvaRes)[ievt], w );
1240 if (CalcOverlapIntergral) {
1243 mva_s_overlap->
Fill( (*mvaRes)[ievt], w_ov );
1245 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1257 if (CalcOverlapIntergral) {
1264 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1265 if (bc_s > 0.0 && bc_b > 0.0)
1269 delete mva_s_overlap;
1270 delete mva_b_overlap;
1292 Log() << kFATAL <<
"dynamic cast to MethodBase* failed" <<
Endl;
1334 Log() << kINFO <<
"<Train> average number of nodes before/after pruning : " 1346 if (methodIndex < 3){
1347 Log() << kDEBUG <<
"No detailed boost monitoring for " 1349 <<
" yet available " <<
Endl;
1360 results->
Store(
new TH2F(
Form(
"EventDistSig_%d",methodIndex),
Form(
"EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1362 results->
Store(
new TH2F(
Form(
"EventDistBkg_%d",methodIndex),
Form(
"EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1374 else h=results->
GetHist2D(
Form(
"EventDistBkg_%d",methodIndex));
1375 if (h) h->
Fill(v0,v1,w);
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
void SetModelPersistence(Bool_t status)
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
void SetMsgType(EMsgType t)
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
static long int sum(long int i)
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Random number generator class based on M.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
THist< 1, int, THistStatContent > TH1I
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
MsgLogger & Endl(MsgLogger &ml)
Singleton class for Global types used by TMVA.
void SingleTrain()
initialization
TString GetMethodName(Types::EMVA method) const
std::vector< TH1 *> fTestSigMVAHist
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual Int_t Fill()
Fill all branches.
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Bool_t fDetailedMonitoring
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Virtual base Class for all MVA method.
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
1-D histogram with a float per channel (see TH1 documentation)}
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Ranking for variables in method (implementation)
Short_t Min(Short_t a, Short_t b)
void ToLower()
Change string to lower-case.
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
std::vector< TH1 *> fTrainBgdMVAHist
const Ranking * CreateRanking()
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
void SetSilentFile(Bool_t status)
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
void SetMethodDir(TDirectory *methodDir)
Double_t fOverlap_integral
static void InhibitOutput()
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
void AddEvent(Double_t val, Double_t weight, Int_t type)
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
const Event * GetEvent() const
std::vector< Double_t > fMethodWeight
Virtual base class for combining several TMVA method.
virtual ~MethodBoost(void)
destructor
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
DataSetInfo & DataInfo() const
Class that contains all the data information.
PDF wrapper for histograms; uses user-defined spline interpolation.
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Class for boosting a TMVA method.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
virtual void Delete(Option_t *option="")
Delete this object.
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
RooCmdArg Timer(Bool_t flag=kTRUE)
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Service class for 2-Dim histogram classes.
const char * GetName() const
ClassInfo * GetClassInfo(Int_t clNum) const
class TMVA::Config::VariablePlotting fVariablePlotting
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Double_t fBaggedSampleFraction
Implementation of the GiniIndex as separation criterion.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
void ScaleBoostWeight(Double_t s) const
const TString & GetJobName() const
const TString & GetMethodName() const
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
1-D histogram with a double per channel (see TH1 documentation)}
virtual TDirectory * GetDirectory(const char *apath, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory named "apath".
IMethod * GetLastMethod()
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
void CreateMVAHistorgrams()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
MethodBase * fCurrentMethod
UInt_t GetNVariables() const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Class for categorizing the phase space.
TString & Remove(Ssiz_t pos)
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString fBoostedMethodOptions
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Bool_t fMonitorBoostedMethod
void RerouteTransformationHandler(TransformationHandler *fTargetTransformation)
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Describe directory structure in memory.
std::vector< TH1 *> fTrainSigMVAHist
TString fBoostedMethodTitle
TH1 * GetHist(const TString &alias) const
void SetBoostWeight(Double_t w) const
void SetCurrentType(Types::ETreeType type) const
void AddPreDefVal(const T &)
void GetHelpMessage() const
Get help message text.
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------— ...
MethodBase * GetCurrentMethod()
Int_t GetNNodesBeforePruning()
virtual const char * GetName() const
Returns name of object.
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
const TString & GetOptions() const
virtual void TestClassification()
initialization
Interface for all concrete MVA method implementations.
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
TString fBoostedMethodName
#define REGISTER_METHOD(CLASS)
for example
std::vector< IMethod * > fMethods
Abstract ClassifierFactory template that handles arbitrary types.
Double_t GetMVAProbAt(Double_t value)
TH2 * GetHist2D(const TString &alias) const
DataSetManager * fDataSetManager
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Class that is the base-class for a vector of result.
Short_t Max(Short_t a, Short_t b)
void SetWeightFileDir(TString fileDir)
set directory of weight file
Double_t GetOriginalWeight() const
Bool_t fHistoricBoolOption
void InitHistos()
initialisation routine
Double_t GetSignalReferenceCut() const
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Bool_t IsSignal(const Event *ev) const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Types::EAnalysisType GetAnalysisType() const
A TTree object has a header with a name and a title.
void Store(TObject *obj, const char *alias=0)
virtual Int_t GetNbinsX() const
std::vector< TH1 *> fBTrainSigMVAHist
static void EnableOutput()
Int_t Fill(Double_t)
Invalid Fill method.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
std::vector< TH1 *> fBTrainBgdMVAHist
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
Types::EMVA GetMethodType() const
Timing information for training and evaluation of MVA methods.
virtual void TestClassification()
initialization
const Event * GetEvent() const
virtual void SetAnalysisType(Types::EAnalysisType type)
std::vector< TH1 *> fTestBgdMVAHist
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Analysis of Boosted Decision Trees.
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void SetSignalReferenceCut(Double_t cut)
const char * Data() const
Bool_t IsModelPersistence()