52 #include "TDirectory.h"
81 TMVA::MethodBoost::MethodBoost( const
TString& jobName,
86 TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption, theTargetDir )
88 , fDetailedMonitoring(
kFALSE)
91 , fBaggedSampleFraction(0)
92 , fBoostedMethodTitle(methodTitle)
93 , fBoostedMethodOptions(theOption)
94 , fMonitorBoostedMethod(kFALSE)
99 , fOverlap_integral(0.0)
102 fMVAvalues =
new std::vector<Float_t>;
112 , fDetailedMonitoring(
kFALSE)
115 , fBaggedSampleFraction(0)
116 , fBoostedMethodTitle(
"")
117 , fBoostedMethodOptions(
"")
118 , fMonitorBoostedMethod(
kFALSE)
123 , fOverlap_integral(0.0)
134 fMethodWeight.clear();
138 fTrainSigMVAHist.clear();
139 fTrainBgdMVAHist.clear();
140 fBTrainSigMVAHist.clear();
141 fBTrainBgdMVAHist.clear();
142 fTestSigMVAHist.clear();
143 fTestBgdMVAHist.clear();
167 DeclareOptionRef( fBoostNum = 1,
"Boost_Num",
168 "Number of times the classifier is boosted" );
170 DeclareOptionRef( fMonitorBoostedMethod =
kTRUE,
"Boost_MonitorMethod",
171 "Write monitoring histograms for each boosted classifier" );
173 DeclareOptionRef( fDetailedMonitoring =
kFALSE,
"Boost_DetailedMonitoring",
174 "Produce histograms for detailed boost monitoring" );
176 DeclareOptionRef( fBoostType =
"AdaBoost",
"Boost_Type",
"Boosting type for the classifiers" );
177 AddPreDefVal(
TString(
"RealAdaBoost"));
178 AddPreDefVal(
TString(
"AdaBoost"));
179 AddPreDefVal(
TString(
"Bagging"));
181 DeclareOptionRef(fBaggedSampleFraction=.6,
"Boost_BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
183 DeclareOptionRef( fAdaBoostBeta = 1.0,
"Boost_AdaBoostBeta",
184 "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
186 DeclareOptionRef( fTransformString =
"step",
"Boost_Transform",
187 "Type of transform applied to every boosted method linear, log, step" );
189 AddPreDefVal(
TString(
"linear"));
191 AddPreDefVal(
TString(
"gauss"));
193 DeclareOptionRef( fRandomSeed = 0,
"Boost_RandomSeed",
194 "Seed for random number generator used for bagging" );
209 DeclareOptionRef( fHistoricOption =
"ByError",
"Boost_MethodWeightType",
210 "How to set the final weight of the boosted classifiers" );
211 AddPreDefVal(
TString(
"ByError"));
212 AddPreDefVal(
TString(
"Average"));
213 AddPreDefVal(
TString(
"ByROC"));
214 AddPreDefVal(
TString(
"ByOverlap"));
215 AddPreDefVal(
TString(
"LastMethod"));
217 DeclareOptionRef( fHistoricOption =
"step",
"Boost_Transform",
218 "Type of transform applied to every boosted method linear, log, step" );
220 AddPreDefVal(
TString(
"linear"));
222 AddPreDefVal(
TString(
"gauss"));
227 AddPreDefVal(
TString(
"HighEdgeGauss"));
228 AddPreDefVal(
TString(
"HighEdgeCoPara"));
231 DeclareOptionRef( fHistoricBoolOption,
"Boost_RecalculateMVACut",
232 "Recalculate the classifier MVA Signallike cut at every boost iteration" );
241 fBoostedMethodTitle = methodTitle;
242 fBoostedMethodOptions = theOption;
264 results->
Store(
new TH1F(
"MethodWeight",
"Normalized Classifier Weight",fBoostNum,0,fBoostNum),
"ClassifierWeight");
265 results->
Store(
new TH1F(
"BoostWeight",
"Boost Weight",fBoostNum,0,fBoostNum),
"BoostWeight");
266 results->
Store(
new TH1F(
"ErrFraction",
"Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),
"ErrorFraction");
267 if (fDetailedMonitoring){
268 results->
Store(
new TH1F(
"ROCIntegral_test",
"ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_test");
269 results->
Store(
new TH1F(
"ROCIntegralBoosted_test",
"ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_test");
270 results->
Store(
new TH1F(
"ROCIntegral_train",
"ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_train");
271 results->
Store(
new TH1F(
"ROCIntegralBoosted_train",
"ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_train");
272 results->
Store(
new TH1F(
"OverlapIntegal_train",
"Overlap integral (training sample)",fBoostNum,0,fBoostNum),
"Overlap");
282 if (fDetailedMonitoring){
295 results->
Store(
new TH1F(
"SoverBtotal",
"S/B in reweighted training sample",fBoostNum,0,fBoostNum),
"SoverBtotal");
299 results->
Store(
new TH1F(
"SeparationGain",
"SeparationGain",fBoostNum,0,fBoostNum),
"SeparationGain");
305 fMonitorTree=
new TTree(
"MonitorBoost",
"Boost variables");
306 fMonitorTree->Branch(
"iMethod",&fCurrentMethodIdx,
"iMethod/I");
307 fMonitorTree->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
308 fMonitorTree->Branch(
"errorFraction",&fMethodError,
"errorFraction/D");
309 fMonitorBoostedMethod =
kTRUE;
318 Log() <<
kDEBUG <<
"CheckSetup: fBoostType="<<fBoostType <<
Endl;
319 Log() <<
kDEBUG <<
"CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<
Endl;
320 Log() <<
kDEBUG <<
"CheckSetup: fBoostWeight="<<fBoostWeight<<
Endl;
321 Log() <<
kDEBUG <<
"CheckSetup: fMethodError="<<fMethodError<<
Endl;
322 Log() <<
kDEBUG <<
"CheckSetup: fBoostNum="<<fBoostNum <<
Endl;
323 Log() <<
kDEBUG <<
"CheckSetup: fRandomSeed=" << fRandomSeed<<
Endl;
324 Log() <<
kDEBUG <<
"CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<
Endl;
325 Log() <<
kDEBUG <<
"CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<
Endl;
326 Log() <<
kDEBUG <<
"CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod?
"true" :
"false") << Endl;
327 Log() <<
kDEBUG <<
"CheckSetup: MName=" << fBoostedMethodName <<
" Title="<< fBoostedMethodTitle<<
Endl;
328 Log() <<
kDEBUG <<
"CheckSetup: MOptions="<< fBoostedMethodOptions <<
Endl;
329 Log() <<
kDEBUG <<
"CheckSetup: fMonitorTree=" << fMonitorTree <<
Endl;
330 Log() <<
kDEBUG <<
"CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx <<
Endl;
331 if (fMethods.size()>0)
Log() <<
kDEBUG <<
"CheckSetup: fMethods[0]" <<fMethods[0]<<
Endl;
332 Log() <<
kDEBUG <<
"CheckSetup: fMethodWeight.size()" << fMethodWeight.size() <<
Endl;
333 if (fMethodWeight.size()>0)
Log() <<
kDEBUG <<
"CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<
Endl;
334 Log() <<
kDEBUG <<
"CheckSetup: trying to repair things" <<
Endl;
349 if (
Data()->GetNTrainingEvents()==0)
Log() <<
kFATAL <<
"<Train> Data() has zero events" <<
Endl;
352 if (fMethods.size() > 0) fMethods.clear();
353 fMVAvalues->resize(
Data()->GetNTrainingEvents(), 0.0);
355 Log() <<
kINFO <<
"Training "<< fBoostNum <<
" " << fBoostedMethodName <<
" with title " << fBoostedMethodTitle <<
" Classifiers ... patience please" <<
Endl;
366 Ssiz_t varTrafoStart=fBoostedMethodOptions.Index(
"~VarTransform=");
367 if (varTrafoStart >0) {
368 Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(
":",varTrafoStart);
369 if (varTrafoEnd<varTrafoStart)
370 varTrafoEnd=fBoostedMethodOptions.Length();
371 fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
376 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
382 Form(
"%s_B%04i", fBoostedMethodTitle.Data(),fCurrentMethodIdx),
384 fBoostedMethodOptions);
388 fCurrentMethod = (
dynamic_cast<MethodBase*
>(method));
390 if (fCurrentMethod==0) {
391 Log() <<
kFATAL <<
"uups.. guess the booking of the " << fCurrentMethodIdx <<
"-th classifier somehow failed" <<
Endl;
399 Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" <<
Endl;
403 fCurrentMethod->SetMsgType(
kWARNING);
404 fCurrentMethod->SetupMethod();
405 fCurrentMethod->ParseOptions();
407 fCurrentMethod->SetAnalysisType( GetAnalysisType() );
408 fCurrentMethod->ProcessSetup();
409 fCurrentMethod->CheckSetup();
413 fCurrentMethod->RerouteTransformationHandler (&(this->GetTransformationHandler()));
417 if (fMonitorBoostedMethod) {
418 methodDir=MethodBaseDir()->
GetDirectory(dirName=
Form(
"%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
420 methodDir=BaseDir()->
mkdir(dirName,dirTitle=
Form(
"Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
422 fCurrentMethod->SetMethodDir(methodDir);
423 fCurrentMethod->BaseDir()->
cd();
432 if (fBoostType==
"Bagging") Bagging();
435 fCurrentMethod->WriteMonitoringHistosToFile();
441 if (fCurrentMethodIdx==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
449 SingleBoost(fCurrentMethod);
455 if (fDetailedMonitoring) {
466 fMonitorTree->Fill();
470 Log() <<
kDEBUG <<
"AdaBoost (methodErr) err = " << fMethodError <<
Endl;
471 if (fMethodError > 0.49999) StopCounter++;
472 if (StopCounter > 0 && fBoostType !=
"Bagging") {
474 fBoostNum = fCurrentMethodIdx+1;
475 Log() <<
kINFO <<
"Error rate has reached 0.5 ("<< fMethodError<<
"), boosting process stopped at #" << fBoostNum <<
" classifier" <<
Endl;
477 Log() <<
kINFO <<
"The classifier might be too strong to boost with Beta = " << fAdaBoostBeta <<
", try reducing it." <<
Endl;
487 Timer* timer1=
new Timer( fBoostNum, GetName() );
489 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
494 if (fCurrentMethodIdx==fBoostNum) {
499 TH1F* tmp =
dynamic_cast<TH1F*
>( results->
GetHist(
"ClassifierWeight") );
500 if (tmp) tmp->
SetBinContent(fCurrentMethodIdx+1,fMethodWeight[fCurrentMethodIdx]);
509 if (fMethods.size()==1) fMethodWeight[0] = 1.0;
520 fBoostedMethodOptions=GetOptions();
527 if (fBoostNum <=0)
Log() <<
kFATAL <<
"CreateHistorgrams called before fBoostNum is initialized" <<
Endl;
531 Int_t signalClass = 0;
532 if (DataInfo().GetClassInfo(
"Signal") != 0) {
533 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
536 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
543 for (
UInt_t imtd=0; imtd<fBoostNum; imtd++) {
544 fTrainSigMVAHist .push_back(
new TH1F(
Form(
"MVA_Train_S_%04i",imtd),
"MVA_Train_S", fNbins, xmin, xmax ) );
545 fTrainBgdMVAHist .push_back(
new TH1F(
Form(
"MVA_Train_B%04i", imtd),
"MVA_Train_B", fNbins, xmin, xmax ) );
546 fBTrainSigMVAHist.push_back(
new TH1F(
Form(
"MVA_BTrain_S%04i",imtd),
"MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
547 fBTrainBgdMVAHist.push_back(
new TH1F(
Form(
"MVA_BTrain_B%04i",imtd),
"MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
548 fTestSigMVAHist .push_back(
new TH1F(
Form(
"MVA_Test_S%04i", imtd),
"MVA_Test_S", fNbins, xmin, xmax ) );
549 fTestBgdMVAHist .push_back(
new TH1F(
Form(
"MVA_Test_B%04i", imtd),
"MVA_Test_B", fNbins, xmin, xmax ) );
558 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
559 const Event *ev =
Data()->GetEvent(ievt);
569 if (fMonitorBoostedMethod) {
570 for (
UInt_t imtd=0;imtd<fBoostNum;imtd++) {
577 fTrainSigMVAHist[imtd]->SetDirectory(dir);
578 fTrainSigMVAHist[imtd]->Write();
579 fTrainBgdMVAHist[imtd]->SetDirectory(dir);
580 fTrainBgdMVAHist[imtd]->Write();
581 fBTrainSigMVAHist[imtd]->SetDirectory(dir);
582 fBTrainSigMVAHist[imtd]->Write();
583 fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
584 fBTrainBgdMVAHist[imtd]->Write();
591 fMonitorTree->Write();
599 if (fMonitorBoostedMethod) {
600 UInt_t nloop = fTestSigMVAHist.size();
601 if (fMethods.size()<nloop) nloop = fMethods.size();
604 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
605 const Event* ev = GetEvent(ievt);
607 if (DataInfo().IsSignal(ev)) {
608 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
609 fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
613 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
614 fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
628 UInt_t nloop = fTestSigMVAHist.size();
629 if (fMethods.size()<nloop) nloop = fMethods.size();
630 if (fMonitorBoostedMethod) {
632 for (
UInt_t imtd=0;imtd<nloop;imtd++) {
637 if (dir==0)
continue;
639 fTestSigMVAHist[imtd]->SetDirectory(dir);
640 fTestSigMVAHist[imtd]->Write();
641 fTestBgdMVAHist[imtd]->SetDirectory(dir);
642 fTestBgdMVAHist[imtd]->Write();
673 const Int_t nBins=10001;
676 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
680 if (val>maxMVA) maxMVA=val;
681 if (val<minMVA) minMVA=val;
683 maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
687 TH1D *mvaS =
new TH1D(
Form(
"MVAS_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
688 TH1D *mvaB =
new TH1D(
Form(
"MVAB_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
689 TH1D *mvaSC =
new TH1D(
Form(
"MVASC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
690 TH1D *mvaBC =
new TH1D(
Form(
"MVABC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
694 if (fDetailedMonitoring){
695 results->
Store(mvaS,
Form(
"MVAS_%d",fCurrentMethodIdx));
696 results->
Store(mvaB,
Form(
"MVAB_%d",fCurrentMethodIdx));
697 results->
Store(mvaSC,
Form(
"MVASC_%d",fCurrentMethodIdx));
698 results->
Store(mvaBC,
Form(
"MVABC_%d",fCurrentMethodIdx));
701 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
703 Double_t weight = GetEvent(ievt)->GetWeight();
706 if (DataInfo().IsSignal(GetEvent(ievt))){
707 mvaS->
Fill(mvaVal,weight);
709 mvaB->
Fill(mvaVal,weight);
743 for (
Int_t ibin=1;ibin<=nBins;ibin++){
754 if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
761 if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
762 else mvaCutOrientation=1;
795 <<
" s2="<<(sTot-sSelCut)
796 <<
" b2="<<(bTot-bSelCut)
797 <<
" s/b(1)=" << sSelCut/bSelCut
798 <<
" s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
799 <<
" index before cut=" << parentIndex
800 <<
" after: left=" << leftIndex
801 <<
" after: right=" << rightIndex
802 <<
" sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
803 <<
" sepGain="<<separationGain
806 <<
" idx="<<fCurrentMethodIdx
807 <<
" cutOrientation="<<mvaCutOrientation
831 if (fBoostType==
"AdaBoost") returnVal = this->AdaBoost (method,1);
832 else if (fBoostType==
"RealAdaBoost") returnVal = this->AdaBoost (method,0);
833 else if (fBoostType==
"Bagging") returnVal = this->Bagging ();
835 Log() <<
kFATAL <<
"<Boost> unknown boost option " << fBoostType<<
" called" <<
Endl;
837 fMethodWeight.push_back(returnVal);
846 Log() <<
kWARNING <<
" AdaBoost called without classifier reference - needed for calulating AdaBoost " <<
Endl;
855 if (discreteAdaBoost) {
866 for (
Long64_t evt=0; evt<GetNEvents(); evt++) {
873 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=
kTRUE;
876 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
877 const Event* ev = GetEvent(ievt);
878 sig=DataInfo().IsSignal(ev);
879 v = fMVAvalues->at(ievt);
882 if (fMonitorBoostedMethod) {
884 fBTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,w);
888 fBTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,w);
893 if (discreteAdaBoost){
895 WrongDetection[ievt]=
kFALSE;
897 WrongDetection[ievt]=
kTRUE;
902 mvaProb = 2*(mvaProb-0.5);
904 if (DataInfo().IsSignal(ev)) trueType = 1;
906 sumWrong+= w*trueType*mvaProb;
910 fMethodError=sumWrong/sumAll;
917 if (fMethodError == 0) {
918 Log() <<
kWARNING <<
"Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " <<
Endl;
921 if (discreteAdaBoost)
922 boostWeight =
TMath::Log((1.-fMethodError)/fMethodError)*fAdaBoostBeta;
924 boostWeight =
TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
940 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
941 const Event* ev =
Data()->GetEvent(ievt);
943 if (discreteAdaBoost){
945 if (WrongDetection[ievt] && boostWeight != 0) {
956 mvaProb = 2*(mvaProb-0.5);
960 if (DataInfo().IsSignal(ev)) trueType = 1;
963 boostfactor =
TMath::Exp(-1*boostWeight*trueType*mvaProb);
971 Double_t normWeight = oldSum/newSum;
974 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
975 const Event* ev =
Data()->GetEvent(ievt);
984 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
985 const Event* ev =
Data()->GetEvent(ievt);
992 delete[] WrongDetection;
993 if (MVAProb)
delete MVAProb;
995 fBoostWeight = boostWeight;
1007 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1008 const Event* ev =
Data()->GetEvent(ievt);
1027 Log() <<
"This method combines several classifier of one species in a "<<
Endl;
1028 Log() <<
"single multivariate quantity via the boost algorithm." <<
Endl;
1029 Log() <<
"the output is a weighted sum over all individual classifiers" <<
Endl;
1030 Log() <<
"By default, the AdaBoost method is employed, which gives " <<
Endl;
1031 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
1032 Log() <<
"weight in the training of the following classifier."<<
Endl;
1033 Log() <<
"Optionally, Bagged boosting can also be applied." <<
Endl;
1037 Log() <<
"The most important parameter in the configuration is the "<<
Endl;
1038 Log() <<
"number of boosts applied (Boost_Num) and the choice of boosting"<<
Endl;
1039 Log() <<
"(Boost_Type), which can be set to either AdaBoost or Bagging." <<
Endl;
1040 Log() <<
"AdaBoosting: The most important parameters in this configuration" <<
Endl;
1041 Log() <<
"is the beta parameter (Boost_AdaBoostBeta) " <<
Endl;
1042 Log() <<
"When boosting a linear classifier, it is sometimes advantageous"<<
Endl;
1043 Log() <<
"to transform the MVA output non-linearly. The following options" <<
Endl;
1044 Log() <<
"are available: step, log, and minmax, the default is no transform."<<
Endl;
1046 Log() <<
"Some classifiers are hard to boost and do not improve much in"<<
Endl;
1047 Log() <<
"their performance by boosting them, some even slightly deteriorate"<<
Endl;
1048 Log() <<
"due to the boosting." <<
Endl;
1049 Log() <<
"The booking of the boost method is special since it requires"<<
Endl;
1050 Log() <<
"the booing of the method to be boosted and the boost itself."<<
Endl;
1051 Log() <<
"This is solved by booking the method to be boosted and to add"<<
Endl;
1052 Log() <<
"all Boost parameters, which all begin with \"Boost_\" to the"<<
Endl;
1053 Log() <<
"options string. The factory separates the options and initiates"<<
Endl;
1054 Log() <<
"the boost process. The TMVA macro directory contains the example"<<
Endl;
1055 Log() <<
"macro \"Boost.C\"" <<
Endl;
1074 for (
UInt_t i=0;i< fMethods.size(); i++){
1081 if (fTransformString ==
"linear"){
1084 else if (fTransformString ==
"log"){
1085 if (val < sigcut) val = sigcut;
1089 else if (fTransformString ==
"step" ){
1093 else if (fTransformString ==
"gauss"){
1097 Log() <<
kFATAL <<
"error unknown transformation " << fTransformString<<
Endl;
1099 mvaValue+=val*fMethodWeight[i];
1100 norm +=fMethodWeight[i];
1105 NoErrorCalc(err, errUpper);
1132 Data()->SetCurrentType(eTT);
1138 if (singleMethod && !method) {
1139 Log() <<
kFATAL <<
" What do you do? Your method:"
1140 << fMethods.back()->GetName()
1141 <<
" seems not to be a propper TMVA method"
1150 std::vector<Double_t> OldMethodWeight(fMethodWeight);
1151 if (!singleMethod) {
1154 for (
UInt_t i=0; i<=fCurrentMethodIdx; i++)
1155 AllMethodsWeight += fMethodWeight.at(i);
1157 if (AllMethodsWeight != 0.0) {
1158 for (
UInt_t i=0; i<=fCurrentMethodIdx; i++)
1159 fMethodWeight[i] /= AllMethodsWeight;
1165 std::vector <Float_t>* mvaRes;
1167 mvaRes = fMVAvalues;
1169 mvaRes =
new std::vector <Float_t>(GetNEvents());
1170 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1172 (*mvaRes)[ievt] = singleMethod ? method->
GetMvaValue(&err) : GetMvaValue(&err);
1178 fMethodWeight = OldMethodWeight;
1181 Int_t signalClass = 0;
1182 if (DataInfo().GetClassInfo(
"Signal") != 0) {
1183 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
1186 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1193 TH1* mva_s =
new TH1F(
"MVA_S",
"MVA_S", fNbins, xmin, xmax );
1194 TH1* mva_b =
new TH1F(
"MVA_B",
"MVA_B", fNbins, xmin, xmax );
1195 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1196 if (CalcOverlapIntergral) {
1197 mva_s_overlap =
new TH1F(
"MVA_S_OVERLAP",
"MVA_S_OVERLAP", fNbins, xmin, xmax );
1198 mva_b_overlap =
new TH1F(
"MVA_B_OVERLAP",
"MVA_B_OVERLAP", fNbins, xmin, xmax );
1200 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1201 const Event* ev = GetEvent(ievt);
1203 if (DataInfo().IsSignal(ev)) mva_s->
Fill( (*mvaRes)[ievt], w );
1204 else mva_b->
Fill( (*mvaRes)[ievt], w );
1206 if (CalcOverlapIntergral) {
1208 if (DataInfo().IsSignal(ev))
1209 mva_s_overlap->
Fill( (*mvaRes)[ievt], w_ov );
1211 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1223 if (CalcOverlapIntergral) {
1227 fOverlap_integral = 0.0;
1230 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1231 if (bc_s > 0.0 && bc_b > 0.0)
1235 delete mva_s_overlap;
1236 delete mva_b_overlap;
1258 Log() <<
kFATAL <<
"dynamic cast to MethodBase* failed" <<
Endl;
1262 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1287 results->
Store(
new TH1I(
"NodesBeforePruning",
"nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesBeforePruning");
1288 results->
Store(
new TH1I(
"NodesAfterPruning",
"nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesAfterPruning");
1301 Log() <<
kINFO <<
"<Train> average number of nodes before/after pruning : "
1313 if (methodIndex < 3){
1314 Log() <<
kINFO <<
"No detailed boost monitoring for "
1315 << GetCurrentMethod(methodIndex)->GetMethodName()
1316 <<
" yet available " <<
Endl;
1324 if (fDetailedMonitoring){
1326 if (DataInfo().GetNVariables() == 2) {
1327 results->
Store(
new TH2F(
Form(
"EventDistSig_%d",methodIndex),
Form(
"EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1329 results->
Store(
new TH2F(
Form(
"EventDistBkg_%d",methodIndex),
Form(
"EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1333 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1334 const Event* ev = GetEvent(ievt);
1340 if (DataInfo().IsSignal(ev)) h=results->
GetHist2D(
Form(
"EventDistSig_%d",methodIndex));
1341 else h=results->
GetHist2D(
Form(
"EventDistBkg_%d",methodIndex));
1342 if (h) h->
Fill(v0,v1,w);
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Random number generator class based on M.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
MsgLogger & Endl(MsgLogger &ml)
TH1 * GetHist(const TString &alias) const
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
void SingleTrain()
initialization
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
void SetBoostWeight(Double_t w) const
1-D histogram with a float per channel (see TH1 documentation)}
Short_t Min(Short_t a, Short_t b)
void ToLower()
Change string to lower-case.
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory and return a pointer to the created directory.
const Ranking * CreateRanking()
virtual Int_t GetNbinsX() const
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
TH2 * GetHist2D(const TString &alias) const
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
static void InhibitOutput()
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=NULL)
virtual Double_t GetBinLowEdge(Int_t bin) const
return bin lower edge for 1D historam Better to use h1.GetXaxis().GetBinLowEdge(bin) ...
void AddEvent(Double_t val, Double_t weight, Int_t type)
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
virtual ~MethodBoost(void)
destructor
void ScaleBoostWeight(Double_t s) const
virtual void SetMarkerColor(Color_t mcolor=1)
std::vector< std::vector< double > > Data
void GetHelpMessage() const
Get help message text.
Types::EMVA GetMethodType() const
Double_t GetOriginalWeight() const
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
RooCmdArg Timer(Bool_t flag=kTRUE)
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
std::string GetMethodName(TCppMethod_t)
Service class for 2-Dim histogram classes.
TString GetMethodName(Types::EMVA method) const
class TMVA::Config::VariablePlotting fVariablePlotting
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
1-D histogram with a double per channel (see TH1 documentation)}
void CreateMVAHistorgrams()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
virtual Double_t GetSeparationGain(const Double_t &nSelS, const Double_t &nSelB, const Double_t &nTotS, const Double_t &nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Describe directory structure in memory.
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
Int_t GetNNodesBeforePruning()
virtual void TestClassification()
initialization
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
#define REGISTER_METHOD(CLASS)
for example
std::vector< IMethod * > fMethods
Abstract ClassifierFactory template that handles arbitrary types.
Double_t GetMVAProbAt(Double_t value)
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual Double_t GetSeparationIndex(const Double_t &s, const Double_t &b)=0
Short_t Max(Short_t a, Short_t b)
void InitHistos()
initialisation routine
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
A TTree object has a header with a name and a title.
Double_t GetSignalReferenceCut() const
void Store(TObject *obj, const char *alias=0)
static void EnableOutput()
Int_t Fill(Double_t)
Invalid Fill method.
virtual void SetTitle(const char *title="")
Change (i.e. set) the title of the TNamed.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
double norm(double *x, double *p)
virtual void TestClassification()
initialization
void SetSignalReferenceCut(Double_t cut)
ClassImp(TMVA::MethodBoost) TMVA