86 TMVA::MethodBoost::MethodBoost( const
TString& jobName,
91 TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption, theTargetDir )
93 , fDetailedMonitoring(
kFALSE)
96 , fBaggedSampleFraction(0)
97 , fBoostedMethodTitle(methodTitle)
98 , fBoostedMethodOptions(theOption)
99 , fMonitorBoostedMethod(kFALSE)
104 , fOverlap_integral(0.0)
107 fMVAvalues =
new std::vector<Float_t>;
117 , fDetailedMonitoring(
kFALSE)
120 , fBaggedSampleFraction(0)
121 , fBoostedMethodTitle(
"")
122 , fBoostedMethodOptions(
"")
123 , fMonitorBoostedMethod(
kFALSE)
128 , fOverlap_integral(0.0)
139 fMethodWeight.clear();
143 fTrainSigMVAHist.clear();
144 fTrainBgdMVAHist.clear();
145 fBTrainSigMVAHist.clear();
146 fBTrainBgdMVAHist.clear();
147 fTestSigMVAHist.clear();
148 fTestBgdMVAHist.clear();
172 DeclareOptionRef( fBoostNum = 1,
"Boost_Num",
173 "Number of times the classifier is boosted" );
175 DeclareOptionRef( fMonitorBoostedMethod =
kTRUE,
"Boost_MonitorMethod",
176 "Write monitoring histograms for each boosted classifier" );
178 DeclareOptionRef( fDetailedMonitoring =
kFALSE,
"Boost_DetailedMonitoring",
179 "Produce histograms for detailed boost monitoring" );
181 DeclareOptionRef( fBoostType =
"AdaBoost",
"Boost_Type",
"Boosting type for the classifiers" );
182 AddPreDefVal(
TString(
"RealAdaBoost"));
183 AddPreDefVal(
TString(
"AdaBoost"));
184 AddPreDefVal(
TString(
"Bagging"));
186 DeclareOptionRef(fBaggedSampleFraction=.6,
"Boost_BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
188 DeclareOptionRef( fAdaBoostBeta = 1.0,
"Boost_AdaBoostBeta",
189 "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
191 DeclareOptionRef( fTransformString =
"step",
"Boost_Transform",
192 "Type of transform applied to every boosted method linear, log, step" );
194 AddPreDefVal(
TString(
"linear"));
196 AddPreDefVal(
TString(
"gauss"));
198 DeclareOptionRef( fRandomSeed = 0,
"Boost_RandomSeed",
199 "Seed for random number generator used for bagging" );
214 DeclareOptionRef( fHistoricOption =
"ByError",
"Boost_MethodWeightType",
215 "How to set the final weight of the boosted classifiers" );
216 AddPreDefVal(
TString(
"ByError"));
217 AddPreDefVal(
TString(
"Average"));
218 AddPreDefVal(
TString(
"ByROC"));
219 AddPreDefVal(
TString(
"ByOverlap"));
220 AddPreDefVal(
TString(
"LastMethod"));
222 DeclareOptionRef( fHistoricOption =
"step",
"Boost_Transform",
223 "Type of transform applied to every boosted method linear, log, step" );
225 AddPreDefVal(
TString(
"linear"));
227 AddPreDefVal(
TString(
"gauss"));
232 AddPreDefVal(
TString(
"HighEdgeGauss"));
233 AddPreDefVal(
TString(
"HighEdgeCoPara"));
236 DeclareOptionRef( fHistoricBoolOption,
"Boost_RecalculateMVACut",
237 "Recalculate the classifier MVA Signallike cut at every boost iteration" );
246 fBoostedMethodTitle = methodTitle;
247 fBoostedMethodOptions = theOption;
269 results->
Store(
new TH1F(
"MethodWeight",
"Normalized Classifier Weight",fBoostNum,0,fBoostNum),
"ClassifierWeight");
270 results->
Store(
new TH1F(
"BoostWeight",
"Boost Weight",fBoostNum,0,fBoostNum),
"BoostWeight");
271 results->
Store(
new TH1F(
"ErrFraction",
"Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),
"ErrorFraction");
272 if (fDetailedMonitoring){
273 results->
Store(
new TH1F(
"ROCIntegral_test",
"ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_test");
274 results->
Store(
new TH1F(
"ROCIntegralBoosted_test",
"ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_test");
275 results->
Store(
new TH1F(
"ROCIntegral_train",
"ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_train");
276 results->
Store(
new TH1F(
"ROCIntegralBoosted_train",
"ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_train");
277 results->
Store(
new TH1F(
"OverlapIntegal_train",
"Overlap integral (training sample)",fBoostNum,0,fBoostNum),
"Overlap");
287 if (fDetailedMonitoring){
300 results->
Store(
new TH1F(
"SoverBtotal",
"S/B in reweighted training sample",fBoostNum,0,fBoostNum),
"SoverBtotal");
304 results->
Store(
new TH1F(
"SeparationGain",
"SeparationGain",fBoostNum,0,fBoostNum),
"SeparationGain");
310 fMonitorTree=
new TTree(
"MonitorBoost",
"Boost variables");
311 fMonitorTree->Branch(
"iMethod",&fCurrentMethodIdx,
"iMethod/I");
312 fMonitorTree->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
313 fMonitorTree->Branch(
"errorFraction",&fMethodError,
"errorFraction/D");
314 fMonitorBoostedMethod =
kTRUE;
323 Log() <<
kDEBUG <<
"CheckSetup: fBoostType="<<fBoostType <<
Endl;
324 Log() <<
kDEBUG <<
"CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<
Endl;
325 Log() <<
kDEBUG <<
"CheckSetup: fBoostWeight="<<fBoostWeight<<
Endl;
326 Log() <<
kDEBUG <<
"CheckSetup: fMethodError="<<fMethodError<<
Endl;
327 Log() <<
kDEBUG <<
"CheckSetup: fBoostNum="<<fBoostNum <<
Endl;
328 Log() <<
kDEBUG <<
"CheckSetup: fRandomSeed=" << fRandomSeed<<
Endl;
329 Log() <<
kDEBUG <<
"CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<
Endl;
330 Log() <<
kDEBUG <<
"CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<
Endl;
331 Log() <<
kDEBUG <<
"CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod?
"true" :
"false") << Endl;
332 Log() <<
kDEBUG <<
"CheckSetup: MName=" << fBoostedMethodName <<
" Title="<< fBoostedMethodTitle<<
Endl;
333 Log() <<
kDEBUG <<
"CheckSetup: MOptions="<< fBoostedMethodOptions <<
Endl;
334 Log() <<
kDEBUG <<
"CheckSetup: fMonitorTree=" << fMonitorTree <<
Endl;
335 Log() <<
kDEBUG <<
"CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx <<
Endl;
336 if (fMethods.size()>0)
Log() <<
kDEBUG <<
"CheckSetup: fMethods[0]" <<fMethods[0]<<
Endl;
337 Log() <<
kDEBUG <<
"CheckSetup: fMethodWeight.size()" << fMethodWeight.size() <<
Endl;
338 if (fMethodWeight.size()>0)
Log() <<
kDEBUG <<
"CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<
Endl;
339 Log() <<
kDEBUG <<
"CheckSetup: trying to repair things" <<
Endl;
354 if (
Data()->GetNTrainingEvents()==0)
Log() <<
kFATAL <<
"<Train> Data() has zero events" <<
Endl;
357 if (fMethods.size() > 0) fMethods.clear();
358 fMVAvalues->resize(
Data()->GetNTrainingEvents(), 0.0);
360 Log() <<
kINFO <<
"Training "<< fBoostNum <<
" " << fBoostedMethodName <<
" with title " << fBoostedMethodTitle <<
" Classifiers ... patience please" <<
Endl;
371 Ssiz_t varTrafoStart=fBoostedMethodOptions.Index(
"~VarTransform=");
372 if (varTrafoStart >0) {
373 Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(
":",varTrafoStart);
374 if (varTrafoEnd<varTrafoStart)
375 varTrafoEnd=fBoostedMethodOptions.Length();
376 fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
381 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
387 Form(
"%s_B%04i", fBoostedMethodTitle.Data(),fCurrentMethodIdx),
389 fBoostedMethodOptions);
393 fCurrentMethod = (
dynamic_cast<MethodBase*
>(method));
395 if (fCurrentMethod==0) {
396 Log() <<
kFATAL <<
"uups.. guess the booking of the " << fCurrentMethodIdx <<
"-th classifier somehow failed" <<
Endl;
404 Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" <<
Endl;
408 fCurrentMethod->SetMsgType(
kWARNING);
409 fCurrentMethod->SetupMethod();
410 fCurrentMethod->ParseOptions();
412 fCurrentMethod->SetAnalysisType( GetAnalysisType() );
413 fCurrentMethod->ProcessSetup();
414 fCurrentMethod->CheckSetup();
418 fCurrentMethod->RerouteTransformationHandler (&(this->GetTransformationHandler()));
422 if (fMonitorBoostedMethod) {
423 methodDir=MethodBaseDir()->
GetDirectory(dirName=
Form(
"%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
425 methodDir=BaseDir()->
mkdir(dirName,dirTitle=
Form(
"Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
427 fCurrentMethod->SetMethodDir(methodDir);
428 fCurrentMethod->BaseDir()->
cd();
437 if (fBoostType==
"Bagging") Bagging();
440 fCurrentMethod->WriteMonitoringHistosToFile();
446 if (fCurrentMethodIdx==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
454 SingleBoost(fCurrentMethod);
460 if (fDetailedMonitoring) {
471 fMonitorTree->Fill();
475 Log() <<
kDEBUG <<
"AdaBoost (methodErr) err = " << fMethodError <<
Endl;
476 if (fMethodError > 0.49999) StopCounter++;
477 if (StopCounter > 0 && fBoostType !=
"Bagging") {
479 fBoostNum = fCurrentMethodIdx+1;
480 Log() <<
kINFO <<
"Error rate has reached 0.5 ("<< fMethodError<<
"), boosting process stopped at #" << fBoostNum <<
" classifier" <<
Endl;
482 Log() <<
kINFO <<
"The classifier might be too strong to boost with Beta = " << fAdaBoostBeta <<
", try reducing it." <<
Endl;
492 Timer* timer1=
new Timer( fBoostNum, GetName() );
494 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
499 if (fCurrentMethodIdx==fBoostNum) {
504 TH1F* tmp =
dynamic_cast<TH1F*
>( results->
GetHist(
"ClassifierWeight") );
505 if (tmp) tmp->
SetBinContent(fCurrentMethodIdx+1,fMethodWeight[fCurrentMethodIdx]);
514 if (fMethods.size()==1) fMethodWeight[0] = 1.0;
525 fBoostedMethodOptions=GetOptions();
532 if (fBoostNum <=0)
Log() <<
kFATAL <<
"CreateHistorgrams called before fBoostNum is initialized" <<
Endl;
536 Int_t signalClass = 0;
537 if (DataInfo().GetClassInfo(
"Signal") != 0) {
538 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
541 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
548 for (
UInt_t imtd=0; imtd<fBoostNum; imtd++) {
549 fTrainSigMVAHist .push_back(
new TH1F(
Form(
"MVA_Train_S_%04i",imtd),
"MVA_Train_S", fNbins, xmin, xmax ) );
550 fTrainBgdMVAHist .push_back(
new TH1F(
Form(
"MVA_Train_B%04i", imtd),
"MVA_Train_B", fNbins, xmin, xmax ) );
551 fBTrainSigMVAHist.push_back(
new TH1F(
Form(
"MVA_BTrain_S%04i",imtd),
"MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
552 fBTrainBgdMVAHist.push_back(
new TH1F(
Form(
"MVA_BTrain_B%04i",imtd),
"MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
553 fTestSigMVAHist .push_back(
new TH1F(
Form(
"MVA_Test_S%04i", imtd),
"MVA_Test_S", fNbins, xmin, xmax ) );
554 fTestBgdMVAHist .push_back(
new TH1F(
Form(
"MVA_Test_B%04i", imtd),
"MVA_Test_B", fNbins, xmin, xmax ) );
563 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
564 const Event *ev =
Data()->GetEvent(ievt);
574 if (fMonitorBoostedMethod) {
575 for (
UInt_t imtd=0;imtd<fBoostNum;imtd++) {
582 fTrainSigMVAHist[imtd]->SetDirectory(dir);
583 fTrainSigMVAHist[imtd]->Write();
584 fTrainBgdMVAHist[imtd]->SetDirectory(dir);
585 fTrainBgdMVAHist[imtd]->Write();
586 fBTrainSigMVAHist[imtd]->SetDirectory(dir);
587 fBTrainSigMVAHist[imtd]->Write();
588 fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
589 fBTrainBgdMVAHist[imtd]->Write();
596 fMonitorTree->Write();
604 if (fMonitorBoostedMethod) {
605 UInt_t nloop = fTestSigMVAHist.size();
606 if (fMethods.size()<nloop) nloop = fMethods.size();
609 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
610 const Event* ev = GetEvent(ievt);
612 if (DataInfo().IsSignal(ev)) {
613 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
614 fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
618 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
619 fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
633 UInt_t nloop = fTestSigMVAHist.size();
634 if (fMethods.size()<nloop) nloop = fMethods.size();
635 if (fMonitorBoostedMethod) {
637 for (
UInt_t imtd=0;imtd<nloop;imtd++) {
642 if (dir==0)
continue;
644 fTestSigMVAHist[imtd]->SetDirectory(dir);
645 fTestSigMVAHist[imtd]->Write();
646 fTestBgdMVAHist[imtd]->SetDirectory(dir);
647 fTestBgdMVAHist[imtd]->Write();
678 const Int_t nBins=10001;
681 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
685 if (val>maxMVA) maxMVA=val;
686 if (val<minMVA) minMVA=val;
688 maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
692 TH1D *mvaS =
new TH1D(
Form(
"MVAS_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
693 TH1D *mvaB =
new TH1D(
Form(
"MVAB_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
694 TH1D *mvaSC =
new TH1D(
Form(
"MVASC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
695 TH1D *mvaBC =
new TH1D(
Form(
"MVABC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
699 if (fDetailedMonitoring){
700 results->
Store(mvaS,
Form(
"MVAS_%d",fCurrentMethodIdx));
701 results->
Store(mvaB,
Form(
"MVAB_%d",fCurrentMethodIdx));
702 results->
Store(mvaSC,
Form(
"MVASC_%d",fCurrentMethodIdx));
703 results->
Store(mvaBC,
Form(
"MVABC_%d",fCurrentMethodIdx));
706 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
708 Double_t weight = GetEvent(ievt)->GetWeight();
711 if (DataInfo().IsSignal(GetEvent(ievt))){
712 mvaS->
Fill(mvaVal,weight);
714 mvaB->
Fill(mvaVal,weight);
748 for (
Int_t ibin=1;ibin<=nBins;ibin++){
759 if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
766 if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
767 else mvaCutOrientation=1;
800 <<
" s2="<<(sTot-sSelCut)
801 <<
" b2="<<(bTot-bSelCut)
802 <<
" s/b(1)=" << sSelCut/bSelCut
803 <<
" s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
804 <<
" index before cut=" << parentIndex
805 <<
" after: left=" << leftIndex
806 <<
" after: right=" << rightIndex
807 <<
" sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
808 <<
" sepGain="<<separationGain
811 <<
" idx="<<fCurrentMethodIdx
812 <<
" cutOrientation="<<mvaCutOrientation
836 if (fBoostType==
"AdaBoost") returnVal = this->AdaBoost (method,1);
837 else if (fBoostType==
"RealAdaBoost") returnVal = this->AdaBoost (method,0);
838 else if (fBoostType==
"Bagging") returnVal = this->Bagging ();
840 Log() <<
kFATAL <<
"<Boost> unknown boost option " << fBoostType<<
" called" <<
Endl;
842 fMethodWeight.push_back(returnVal);
851 Log() <<
kWARNING <<
" AdaBoost called without classifier reference - needed for calulating AdaBoost " <<
Endl;
860 if (discreteAdaBoost) {
871 for (
Long64_t evt=0; evt<GetNEvents(); evt++) {
878 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=
kTRUE;
881 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
882 const Event* ev = GetEvent(ievt);
883 sig=DataInfo().IsSignal(ev);
884 v = fMVAvalues->at(ievt);
887 if (fMonitorBoostedMethod) {
889 fBTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,w);
893 fBTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,w);
898 if (discreteAdaBoost){
900 WrongDetection[ievt]=
kFALSE;
902 WrongDetection[ievt]=
kTRUE;
907 mvaProb = 2*(mvaProb-0.5);
909 if (DataInfo().IsSignal(ev)) trueType = 1;
911 sumWrong+= w*trueType*mvaProb;
915 fMethodError=sumWrong/sumAll;
922 if (fMethodError == 0) {
923 Log() <<
kWARNING <<
"Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " <<
Endl;
926 if (discreteAdaBoost)
927 boostWeight =
TMath::Log((1.-fMethodError)/fMethodError)*fAdaBoostBeta;
929 boostWeight =
TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
945 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
946 const Event* ev =
Data()->GetEvent(ievt);
948 if (discreteAdaBoost){
950 if (WrongDetection[ievt] && boostWeight != 0) {
961 mvaProb = 2*(mvaProb-0.5);
965 if (DataInfo().IsSignal(ev)) trueType = 1;
968 boostfactor =
TMath::Exp(-1*boostWeight*trueType*mvaProb);
976 Double_t normWeight = oldSum/newSum;
979 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
980 const Event* ev =
Data()->GetEvent(ievt);
989 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
990 const Event* ev =
Data()->GetEvent(ievt);
997 delete[] WrongDetection;
998 if (MVAProb)
delete MVAProb;
1000 fBoostWeight = boostWeight;
1012 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1013 const Event* ev =
Data()->GetEvent(ievt);
1032 Log() <<
"This method combines several classifier of one species in a "<<
Endl;
1033 Log() <<
"single multivariate quantity via the boost algorithm." <<
Endl;
1034 Log() <<
"the output is a weighted sum over all individual classifiers" <<
Endl;
1035 Log() <<
"By default, the AdaBoost method is employed, which gives " <<
Endl;
1036 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
1037 Log() <<
"weight in the training of the following classifier."<<
Endl;
1038 Log() <<
"Optionally, Bagged boosting can also be applied." <<
Endl;
1042 Log() <<
"The most important parameter in the configuration is the "<<
Endl;
1043 Log() <<
"number of boosts applied (Boost_Num) and the choice of boosting"<<
Endl;
1044 Log() <<
"(Boost_Type), which can be set to either AdaBoost or Bagging." <<
Endl;
1045 Log() <<
"AdaBoosting: The most important parameters in this configuration" <<
Endl;
1046 Log() <<
"is the beta parameter (Boost_AdaBoostBeta) " <<
Endl;
1047 Log() <<
"When boosting a linear classifier, it is sometimes advantageous"<<
Endl;
1048 Log() <<
"to transform the MVA output non-linearly. The following options" <<
Endl;
1049 Log() <<
"are available: step, log, and minmax, the default is no transform."<<
Endl;
1051 Log() <<
"Some classifiers are hard to boost and do not improve much in"<<
Endl;
1052 Log() <<
"their performance by boosting them, some even slightly deteriorate"<<
Endl;
1053 Log() <<
"due to the boosting." <<
Endl;
1054 Log() <<
"The booking of the boost method is special since it requires"<<
Endl;
1055 Log() <<
"the booing of the method to be boosted and the boost itself."<<
Endl;
1056 Log() <<
"This is solved by booking the method to be boosted and to add"<<
Endl;
1057 Log() <<
"all Boost parameters, which all begin with \"Boost_\" to the"<<
Endl;
1058 Log() <<
"options string. The factory separates the options and initiates"<<
Endl;
1059 Log() <<
"the boost process. The TMVA macro directory contains the example"<<
Endl;
1060 Log() <<
"macro \"Boost.C\"" <<
Endl;
1079 for (
UInt_t i=0;i< fMethods.size(); i++){
1086 if (fTransformString ==
"linear"){
1089 else if (fTransformString ==
"log"){
1090 if (val < sigcut) val = sigcut;
1094 else if (fTransformString ==
"step" ){
1098 else if (fTransformString ==
"gauss"){
1102 Log() <<
kFATAL <<
"error unknown transformation " << fTransformString<<
Endl;
1104 mvaValue+=val*fMethodWeight[i];
1105 norm +=fMethodWeight[i];
1110 NoErrorCalc(err, errUpper);
1137 Data()->SetCurrentType(eTT);
1143 if (singleMethod && !method) {
1144 Log() <<
kFATAL <<
" What do you do? Your method:"
1145 << fMethods.back()->GetName()
1146 <<
" seems not to be a propper TMVA method"
1155 std::vector<Double_t> OldMethodWeight(fMethodWeight);
1156 if (!singleMethod) {
1159 for (
UInt_t i=0; i<=fCurrentMethodIdx; i++)
1160 AllMethodsWeight += fMethodWeight.at(i);
1162 if (AllMethodsWeight != 0.0) {
1163 for (
UInt_t i=0; i<=fCurrentMethodIdx; i++)
1164 fMethodWeight[i] /= AllMethodsWeight;
1170 std::vector <Float_t>* mvaRes;
1172 mvaRes = fMVAvalues;
1174 mvaRes =
new std::vector <Float_t>(GetNEvents());
1175 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1177 (*mvaRes)[ievt] = singleMethod ? method->
GetMvaValue(&err) : GetMvaValue(&err);
1183 fMethodWeight = OldMethodWeight;
1186 Int_t signalClass = 0;
1187 if (DataInfo().GetClassInfo(
"Signal") != 0) {
1188 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
1191 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1198 TH1* mva_s =
new TH1F(
"MVA_S",
"MVA_S", fNbins, xmin, xmax );
1199 TH1* mva_b =
new TH1F(
"MVA_B",
"MVA_B", fNbins, xmin, xmax );
1200 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1201 if (CalcOverlapIntergral) {
1202 mva_s_overlap =
new TH1F(
"MVA_S_OVERLAP",
"MVA_S_OVERLAP", fNbins, xmin, xmax );
1203 mva_b_overlap =
new TH1F(
"MVA_B_OVERLAP",
"MVA_B_OVERLAP", fNbins, xmin, xmax );
1205 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1206 const Event* ev = GetEvent(ievt);
1208 if (DataInfo().IsSignal(ev)) mva_s->
Fill( (*mvaRes)[ievt], w );
1209 else mva_b->
Fill( (*mvaRes)[ievt], w );
1211 if (CalcOverlapIntergral) {
1213 if (DataInfo().IsSignal(ev))
1214 mva_s_overlap->
Fill( (*mvaRes)[ievt], w_ov );
1216 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1228 if (CalcOverlapIntergral) {
1232 fOverlap_integral = 0.0;
1235 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1236 if (bc_s > 0.0 && bc_b > 0.0)
1240 delete mva_s_overlap;
1241 delete mva_b_overlap;
1263 Log() <<
kFATAL <<
"dynamic cast to MethodBase* failed" <<
Endl;
1267 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1292 results->
Store(
new TH1I(
"NodesBeforePruning",
"nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesBeforePruning");
1293 results->
Store(
new TH1I(
"NodesAfterPruning",
"nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesAfterPruning");
1306 Log() <<
kINFO <<
"<Train> average number of nodes before/after pruning : "
1318 if (methodIndex < 3){
1319 Log() <<
kINFO <<
"No detailed boost monitoring for "
1320 << GetCurrentMethod(methodIndex)->GetMethodName()
1321 <<
" yet available " <<
Endl;
1329 if (fDetailedMonitoring){
1331 if (DataInfo().GetNVariables() == 2) {
1332 results->
Store(
new TH2F(
Form(
"EventDistSig_%d",methodIndex),
Form(
"EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1334 results->
Store(
new TH2F(
Form(
"EventDistBkg_%d",methodIndex),
Form(
"EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1338 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1339 const Event* ev = GetEvent(ievt);
1345 if (DataInfo().IsSignal(ev)) h=results->
GetHist2D(
Form(
"EventDistSig_%d",methodIndex));
1346 else h=results->
GetHist2D(
Form(
"EventDistBkg_%d",methodIndex));
1347 if (h) h->
Fill(v0,v1,w);
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Random number generator class based on M.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
MsgLogger & Endl(MsgLogger &ml)
TH1 * GetHist(const TString &alias) const
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
void SingleTrain()
initialization
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
void SetBoostWeight(Double_t w) const
1-D histogram with a float per channel (see TH1 documentation)}
Short_t Min(Short_t a, Short_t b)
void ToLower()
Change string to lower-case.
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory and return a pointer to the created directory.
const Ranking * CreateRanking()
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
virtual Int_t GetNbinsX() const
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
TH2 * GetHist2D(const TString &alias) const
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
static void InhibitOutput()
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=NULL)
virtual Double_t GetBinLowEdge(Int_t bin) const
return bin lower edge for 1D historam Better to use h1.GetXaxis().GetBinLowEdge(bin) ...
void AddEvent(Double_t val, Double_t weight, Int_t type)
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
virtual ~MethodBoost(void)
destructor
void ScaleBoostWeight(Double_t s) const
virtual void SetMarkerColor(Color_t mcolor=1)
std::vector< std::vector< double > > Data
void GetHelpMessage() const
Get help message text.
Types::EMVA GetMethodType() const
Double_t GetOriginalWeight() const
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
RooCmdArg Timer(Bool_t flag=kTRUE)
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
std::string GetMethodName(TCppMethod_t)
Service class for 2-Dim histogram classes.
TString GetMethodName(Types::EMVA method) const
1-D histogram with a int per channel (see TH1 documentation)}
2-D histogram with a float per channel (see TH1 documentation)}
class TMVA::Config::VariablePlotting fVariablePlotting
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
1-D histogram with a double per channel (see TH1 documentation)}
void CreateMVAHistorgrams()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
virtual Double_t GetSeparationGain(const Double_t &nSelS, const Double_t &nSelB, const Double_t &nTotS, const Double_t &nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Describe directory structure in memory.
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Int_t GetNNodesBeforePruning()
virtual void TestClassification()
initialization
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
#define REGISTER_METHOD(CLASS)
for example
std::vector< IMethod * > fMethods
Double_t GetMVAProbAt(Double_t value)
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual Double_t GetSeparationIndex(const Double_t &s, const Double_t &b)=0
Short_t Max(Short_t a, Short_t b)
void InitHistos()
initialisation routine
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
A TTree object has a header with a name and a title.
Double_t GetSignalReferenceCut() const
void Store(TObject *obj, const char *alias=0)
static void EnableOutput()
Int_t Fill(Double_t)
Invalid Fill method.
virtual void SetTitle(const char *title="")
Change (i.e. set) the title of the TNamed.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
double norm(double *x, double *p)
virtual void TestClassification()
initialization
void SetSignalReferenceCut(Double_t cut)
ClassImp(TMVA::MethodBoost) TMVA