96 , fDetailedMonitoring(
kFALSE)
99 , fBaggedSampleFraction(0)
100 , fBoostedMethodTitle(methodTitle)
101 , fBoostedMethodOptions(theOption)
102 , fMonitorBoostedMethod(
kFALSE)
107 , fOverlap_integral(0.0)
121 , fDetailedMonitoring(
kFALSE)
124 , fBaggedSampleFraction(0)
125 , fBoostedMethodTitle(
"")
126 , fBoostedMethodOptions(
"")
127 , fMonitorBoostedMethod(
kFALSE)
132 , fOverlap_integral(0.0)
145 fMethodWeight.clear();
149 fTrainSigMVAHist.clear();
150 fTrainBgdMVAHist.clear();
151 fBTrainSigMVAHist.clear();
152 fBTrainBgdMVAHist.clear();
153 fTestSigMVAHist.clear();
154 fTestBgdMVAHist.clear();
178 DeclareOptionRef( fBoostNum = 1,
"Boost_Num",
179 "Number of times the classifier is boosted" );
181 DeclareOptionRef( fMonitorBoostedMethod =
kTRUE,
"Boost_MonitorMethod",
182 "Write monitoring histograms for each boosted classifier" );
184 DeclareOptionRef( fDetailedMonitoring =
kFALSE,
"Boost_DetailedMonitoring",
185 "Produce histograms for detailed boost monitoring" );
187 DeclareOptionRef( fBoostType =
"AdaBoost",
"Boost_Type",
"Boosting type for the classifiers" );
188 AddPreDefVal(
TString(
"RealAdaBoost"));
189 AddPreDefVal(
TString(
"AdaBoost"));
190 AddPreDefVal(
TString(
"Bagging"));
192 DeclareOptionRef(fBaggedSampleFraction=.6,
"Boost_BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
194 DeclareOptionRef( fAdaBoostBeta = 1.0,
"Boost_AdaBoostBeta",
195 "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
197 DeclareOptionRef( fTransformString =
"step",
"Boost_Transform",
198 "Type of transform applied to every boosted method linear, log, step" );
200 AddPreDefVal(
TString(
"linear"));
202 AddPreDefVal(
TString(
"gauss"));
204 DeclareOptionRef( fRandomSeed = 0,
"Boost_RandomSeed",
205 "Seed for random number generator used for bagging" );
220 DeclareOptionRef( fHistoricOption =
"ByError",
"Boost_MethodWeightType",
221 "How to set the final weight of the boosted classifiers" );
222 AddPreDefVal(
TString(
"ByError"));
223 AddPreDefVal(
TString(
"Average"));
224 AddPreDefVal(
TString(
"ByROC"));
225 AddPreDefVal(
TString(
"ByOverlap"));
226 AddPreDefVal(
TString(
"LastMethod"));
228 DeclareOptionRef( fHistoricOption =
"step",
"Boost_Transform",
229 "Type of transform applied to every boosted method linear, log, step" );
231 AddPreDefVal(
TString(
"linear"));
233 AddPreDefVal(
TString(
"gauss"));
238 AddPreDefVal(
TString(
"HighEdgeGauss"));
239 AddPreDefVal(
TString(
"HighEdgeCoPara"));
242 DeclareOptionRef( fHistoricBoolOption,
"Boost_RecalculateMVACut",
243 "Recalculate the classifier MVA Signallike cut at every boost iteration" );
253 fBoostedMethodTitle = methodTitle;
254 fBoostedMethodOptions = theOption;
276 results->
Store(
new TH1F(
"MethodWeight",
"Normalized Classifier Weight",fBoostNum,0,fBoostNum),
"ClassifierWeight");
277 results->
Store(
new TH1F(
"BoostWeight",
"Boost Weight",fBoostNum,0,fBoostNum),
"BoostWeight");
278 results->
Store(
new TH1F(
"ErrFraction",
"Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),
"ErrorFraction");
279 if (fDetailedMonitoring){
280 results->
Store(
new TH1F(
"ROCIntegral_test",
"ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_test");
281 results->
Store(
new TH1F(
"ROCIntegralBoosted_test",
"ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_test");
282 results->
Store(
new TH1F(
"ROCIntegral_train",
"ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_train");
283 results->
Store(
new TH1F(
"ROCIntegralBoosted_train",
"ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_train");
284 results->
Store(
new TH1F(
"OverlapIntegal_train",
"Overlap integral (training sample)",fBoostNum,0,fBoostNum),
"Overlap");
294 if (fDetailedMonitoring){
307 results->
Store(
new TH1F(
"SoverBtotal",
"S/B in reweighted training sample",fBoostNum,0,fBoostNum),
"SoverBtotal");
311 results->
Store(
new TH1F(
"SeparationGain",
"SeparationGain",fBoostNum,0,fBoostNum),
"SeparationGain");
317 fMonitorTree=
new TTree(
"MonitorBoost",
"Boost variables");
318 fMonitorTree->Branch(
"iMethod",&fCurrentMethodIdx,
"iMethod/I");
319 fMonitorTree->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
320 fMonitorTree->Branch(
"errorFraction",&fMethodError,
"errorFraction/D");
321 fMonitorBoostedMethod =
kTRUE;
330 Log() << kDEBUG <<
"CheckSetup: fBoostType="<<fBoostType <<
Endl;
331 Log() << kDEBUG <<
"CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<
Endl;
332 Log() << kDEBUG <<
"CheckSetup: fBoostWeight="<<fBoostWeight<<
Endl;
333 Log() << kDEBUG <<
"CheckSetup: fMethodError="<<fMethodError<<
Endl;
334 Log() << kDEBUG <<
"CheckSetup: fBoostNum="<<fBoostNum <<
Endl;
335 Log() << kDEBUG <<
"CheckSetup: fRandomSeed=" << fRandomSeed<<
Endl;
336 Log() << kDEBUG <<
"CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<
Endl;
337 Log() << kDEBUG <<
"CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<
Endl;
338 Log() << kDEBUG <<
"CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod?
"true" :
"false") <<
Endl;
339 Log() << kDEBUG <<
"CheckSetup: MName=" << fBoostedMethodName <<
" Title="<< fBoostedMethodTitle<<
Endl;
340 Log() << kDEBUG <<
"CheckSetup: MOptions="<< fBoostedMethodOptions <<
Endl;
341 Log() << kDEBUG <<
"CheckSetup: fMonitorTree=" << fMonitorTree <<
Endl;
342 Log() << kDEBUG <<
"CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx <<
Endl;
343 if (fMethods.size()>0) Log() << kDEBUG <<
"CheckSetup: fMethods[0]" <<fMethods[0]<<
Endl;
344 Log() << kDEBUG <<
"CheckSetup: fMethodWeight.size()" << fMethodWeight.size() <<
Endl;
345 if (fMethodWeight.size()>0) Log() << kDEBUG <<
"CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<
Endl;
346 Log() << kDEBUG <<
"CheckSetup: trying to repair things" <<
Endl;
361 if (Data()->GetNTrainingEvents()==0) Log() << kFATAL <<
"<Train> Data() has zero events" <<
Endl;
364 if (fMethods.size() > 0) fMethods.clear();
365 fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
367 Log() << kINFO <<
"Training "<< fBoostNum <<
" " << fBoostedMethodName <<
" with title " << fBoostedMethodTitle <<
" Classifiers ... patience please" <<
Endl;
368 Timer timer( fBoostNum, GetName() );
378 Ssiz_t varTrafoStart=fBoostedMethodOptions.Index(
"~VarTransform=");
379 if (varTrafoStart >0) {
380 Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(
":",varTrafoStart);
381 if (varTrafoEnd<varTrafoStart)
382 varTrafoEnd=fBoostedMethodOptions.Length();
383 fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
388 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
393 fBoostedMethodName.Data(), GetJobName(),
Form(
"%s_B%04i", fBoostedMethodTitle.Data(), fCurrentMethodIdx),
394 DataInfo(), fBoostedMethodOptions);
398 fCurrentMethod = (
dynamic_cast<MethodBase*
>(method));
400 if (fCurrentMethod==0) {
401 Log() << kFATAL <<
"uups.. guess the booking of the " << fCurrentMethodIdx <<
"-th classifier somehow failed" <<
Endl;
409 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" <<
Endl;
413 fCurrentMethod->SetMsgType(kWARNING);
414 fCurrentMethod->SetupMethod();
415 fCurrentMethod->ParseOptions();
417 fCurrentMethod->SetAnalysisType( GetAnalysisType() );
418 fCurrentMethod->ProcessSetup();
419 fCurrentMethod->CheckSetup();
423 fCurrentMethod->RerouteTransformationHandler (&(this->GetTransformationHandler()));
429 if (fMonitorBoostedMethod) {
430 methodDir=GetFile()->
GetDirectory(dirName=
Form(
"%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
432 methodDir=BaseDir()->
mkdir(dirName,dirTitle=
Form(
"Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
434 fCurrentMethod->SetMethodDir(methodDir);
435 fCurrentMethod->BaseDir()->
cd();
445 if (fBoostType==
"Bagging") Bagging();
448 if(!IsSilentFile())fCurrentMethod->WriteMonitoringHistosToFile();
454 if(!IsSilentFile())
if (fCurrentMethodIdx==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
462 SingleBoost(fCurrentMethod);
468 if (fDetailedMonitoring) {
479 fMonitorTree->Fill();
483 Log() << kDEBUG <<
"AdaBoost (methodErr) err = " << fMethodError <<
Endl;
484 if (fMethodError > 0.49999) StopCounter++;
485 if (StopCounter > 0 && fBoostType !=
"Bagging") {
487 fBoostNum = fCurrentMethodIdx+1;
488 Log() << kINFO <<
"Error rate has reached 0.5 ("<< fMethodError<<
"), boosting process stopped at #" << fBoostNum <<
" classifier" <<
Endl;
490 Log() << kINFO <<
"The classifier might be too strong to boost with Beta = " << fAdaBoostBeta <<
", try reducing it." <<
Endl;
500 Timer* timer1=
new Timer( fBoostNum, GetName() );
502 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
507 if (fCurrentMethodIdx==fBoostNum) {
512 TH1F* tmp =
dynamic_cast<TH1F*
>( results->
GetHist(
"ClassifierWeight") );
513 if (tmp) tmp->
SetBinContent(fCurrentMethodIdx+1,fMethodWeight[fCurrentMethodIdx]);
522 if (fMethods.size()==1) fMethodWeight[0] = 1.0;
533 fBoostedMethodOptions=GetOptions();
540 if (fBoostNum <=0) Log() << kFATAL <<
"CreateHistograms called before fBoostNum is initialized" <<
Endl;
544 Int_t signalClass = 0;
545 if (DataInfo().GetClassInfo(
"Signal") != 0) {
546 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
549 meanS, meanB, rmsS, rmsB,
xmin,
xmax, signalClass );
556 for (
UInt_t imtd=0; imtd<fBoostNum; imtd++) {
557 fTrainSigMVAHist .push_back(
new TH1F(
Form(
"MVA_Train_S_%04i",imtd),
"MVA_Train_S", fNbins,
xmin,
xmax ) );
558 fTrainBgdMVAHist .push_back(
new TH1F(
Form(
"MVA_Train_B%04i", imtd),
"MVA_Train_B", fNbins,
xmin,
xmax ) );
559 fBTrainSigMVAHist.push_back(
new TH1F(
Form(
"MVA_BTrain_S%04i",imtd),
"MVA_BoostedTrain_S", fNbins,
xmin,
xmax ) );
560 fBTrainBgdMVAHist.push_back(
new TH1F(
Form(
"MVA_BTrain_B%04i",imtd),
"MVA_BoostedTrain_B", fNbins,
xmin,
xmax ) );
561 fTestSigMVAHist .push_back(
new TH1F(
Form(
"MVA_Test_S%04i", imtd),
"MVA_Test_S", fNbins,
xmin,
xmax ) );
562 fTestBgdMVAHist .push_back(
new TH1F(
Form(
"MVA_Test_B%04i", imtd),
"MVA_Test_B", fNbins,
xmin,
xmax ) );
571 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
572 const Event *ev = Data()->GetEvent(ievt);
582 if (fMonitorBoostedMethod) {
583 for (
UInt_t imtd=0;imtd<fBoostNum;imtd++) {
590 fTrainSigMVAHist[imtd]->SetDirectory(dir);
591 fTrainSigMVAHist[imtd]->Write();
592 fTrainBgdMVAHist[imtd]->SetDirectory(dir);
593 fTrainBgdMVAHist[imtd]->Write();
594 fBTrainSigMVAHist[imtd]->SetDirectory(dir);
595 fBTrainSigMVAHist[imtd]->Write();
596 fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
597 fBTrainBgdMVAHist[imtd]->Write();
604 fMonitorTree->Write();
612 if (fMonitorBoostedMethod) {
613 UInt_t nloop = fTestSigMVAHist.size();
614 if (fMethods.size()<nloop) nloop = fMethods.size();
617 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
618 const Event* ev = GetEvent(ievt);
620 if (DataInfo().IsSignal(ev)) {
621 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
622 fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
626 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
627 fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
641 UInt_t nloop = fTestSigMVAHist.size();
642 if (fMethods.size()<nloop) nloop = fMethods.size();
643 if (fMonitorBoostedMethod) {
645 for (
UInt_t imtd=0;imtd<nloop;imtd++) {
650 if (dir==0)
continue;
652 fTestSigMVAHist[imtd]->SetDirectory(dir);
653 fTestSigMVAHist[imtd]->Write();
654 fTestBgdMVAHist[imtd]->SetDirectory(dir);
655 fTestBgdMVAHist[imtd]->Write();
676 if(IsModelPersistence()){
677 TString _fFileDir= DataInfo().GetName();
695 const Int_t nBins=10001;
698 for (
Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
702 if (val>maxMVA) maxMVA=val;
703 if (val<minMVA) minMVA=val;
705 maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
709 TH1D *mvaS =
new TH1D(
Form(
"MVAS_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
710 TH1D *mvaB =
new TH1D(
Form(
"MVAB_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
711 TH1D *mvaSC =
new TH1D(
Form(
"MVASC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
712 TH1D *mvaBC =
new TH1D(
Form(
"MVABC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
716 if (fDetailedMonitoring){
717 results->
Store(mvaS,
Form(
"MVAS_%d",fCurrentMethodIdx));
718 results->
Store(mvaB,
Form(
"MVAB_%d",fCurrentMethodIdx));
719 results->
Store(mvaSC,
Form(
"MVASC_%d",fCurrentMethodIdx));
720 results->
Store(mvaBC,
Form(
"MVABC_%d",fCurrentMethodIdx));
723 for (
Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
725 Double_t weight = GetEvent(ievt)->GetWeight();
728 if (DataInfo().IsSignal(GetEvent(ievt))){
729 mvaS->
Fill(mvaVal,weight);
731 mvaB->
Fill(mvaVal,weight);
767 for (
Int_t ibin=1;ibin<=nBins;ibin++){
778 if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
785 if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
786 else mvaCutOrientation=1;
819 <<
" s2="<<(sTot-sSelCut)
820 <<
" b2="<<(bTot-bSelCut)
821 <<
" s/b(1)=" << sSelCut/bSelCut
822 <<
" s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
823 <<
" index before cut=" << parentIndex
824 <<
" after: left=" << leftIndex
825 <<
" after: right=" << rightIndex
826 <<
" sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
827 <<
" sepGain="<<separationGain
830 <<
" idx="<<fCurrentMethodIdx
831 <<
" cutOrientation="<<mvaCutOrientation
858 if (fBoostType==
"AdaBoost") returnVal = this->AdaBoost (method,1);
859 else if (fBoostType==
"RealAdaBoost") returnVal = this->AdaBoost (method,0);
860 else if (fBoostType==
"Bagging") returnVal = this->Bagging ();
862 Log() << kFATAL <<
"<Boost> unknown boost option " << fBoostType<<
" called" <<
Endl;
864 fMethodWeight.push_back(returnVal);
873 Log() << kWARNING <<
" AdaBoost called without classifier reference - needed for calculating AdaBoost " <<
Endl;
882 if (discreteAdaBoost) {
893 for (
Long64_t evt=0; evt<GetNEvents(); evt++) {
894 const Event* ev = Data()->GetEvent(evt);
900 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=
kTRUE;
903 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
904 const Event* ev = GetEvent(ievt);
905 sig=DataInfo().IsSignal(ev);
906 v = fMVAvalues->at(ievt);
911 if (fMonitorBoostedMethod) {
913 fBTrainSigMVAHist[fCurrentMethodIdx]->Fill(
v,w);
917 fBTrainBgdMVAHist[fCurrentMethodIdx]->Fill(
v,w);
923 if (discreteAdaBoost){
925 WrongDetection[ievt]=
kFALSE;
927 WrongDetection[ievt]=
kTRUE;
932 mvaProb = 2*(mvaProb-0.5);
934 if (DataInfo().IsSignal(ev)) trueType = 1;
936 sumWrong+= w*trueType*mvaProb;
940 fMethodError=sumWrong/sumAll;
947 if (fMethodError == 0) {
948 Log() << kWARNING <<
"Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " <<
Endl;
951 if (discreteAdaBoost)
952 boostWeight =
TMath::Log((1.-fMethodError)/fMethodError)*fAdaBoostBeta;
954 boostWeight =
TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
970 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
971 const Event* ev = Data()->GetEvent(ievt);
973 if (discreteAdaBoost){
975 if (WrongDetection[ievt] && boostWeight != 0) {
986 mvaProb = 2*(mvaProb-0.5);
990 if (DataInfo().IsSignal(ev)) trueType = 1;
993 boostfactor =
TMath::Exp(-1*boostWeight*trueType*mvaProb);
1001 Double_t normWeight = oldSum/newSum;
1004 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1005 const Event* ev = Data()->GetEvent(ievt);
1014 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1015 const Event* ev = Data()->GetEvent(ievt);
1022 delete[] WrongDetection;
1023 if (MVAProb)
delete MVAProb;
1025 fBoostWeight = boostWeight;
1037 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1038 const Event* ev = Data()->GetEvent(ievt);
1057 Log() <<
"This method combines several classifier of one species in a "<<
Endl;
1058 Log() <<
"single multivariate quantity via the boost algorithm." <<
Endl;
1059 Log() <<
"the output is a weighted sum over all individual classifiers" <<
Endl;
1060 Log() <<
"By default, the AdaBoost method is employed, which gives " <<
Endl;
1061 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
1062 Log() <<
"weight in the training of the following classifier."<<
Endl;
1063 Log() <<
"Optionally, Bagged boosting can also be applied." <<
Endl;
1067 Log() <<
"The most important parameter in the configuration is the "<<
Endl;
1068 Log() <<
"number of boosts applied (Boost_Num) and the choice of boosting"<<
Endl;
1069 Log() <<
"(Boost_Type), which can be set to either AdaBoost or Bagging." <<
Endl;
1070 Log() <<
"AdaBoosting: The most important parameters in this configuration" <<
Endl;
1071 Log() <<
"is the beta parameter (Boost_AdaBoostBeta) " <<
Endl;
1072 Log() <<
"When boosting a linear classifier, it is sometimes advantageous"<<
Endl;
1073 Log() <<
"to transform the MVA output non-linearly. The following options" <<
Endl;
1074 Log() <<
"are available: step, log, and minmax, the default is no transform."<<
Endl;
1076 Log() <<
"Some classifiers are hard to boost and do not improve much in"<<
Endl;
1077 Log() <<
"their performance by boosting them, some even slightly deteriorate"<<
Endl;
1078 Log() <<
"due to the boosting." <<
Endl;
1079 Log() <<
"The booking of the boost method is special since it requires"<<
Endl;
1080 Log() <<
"the booing of the method to be boosted and the boost itself."<<
Endl;
1081 Log() <<
"This is solved by booking the method to be boosted and to add"<<
Endl;
1082 Log() <<
"all Boost parameters, which all begin with \"Boost_\" to the"<<
Endl;
1083 Log() <<
"options string. The factory separates the options and initiates"<<
Endl;
1084 Log() <<
"the boost process. The TMVA macro directory contains the example"<<
Endl;
1085 Log() <<
"macro \"Boost.C\"" <<
Endl;
1104 for (
UInt_t i=0;i< fMethods.size(); i++){
1108 Double_t sigcut =
m->GetSignalReferenceCut();
1111 if (fTransformString ==
"linear"){
1114 else if (fTransformString ==
"log"){
1115 if (val < sigcut) val = sigcut;
1119 else if (fTransformString ==
"step" ){
1120 if (
m->IsSignalLike(val)) val = 1.;
1123 else if (fTransformString ==
"gauss"){
1127 Log() << kFATAL <<
"error unknown transformation " << fTransformString<<
Endl;
1129 mvaValue+=val*fMethodWeight[i];
1130 norm +=fMethodWeight[i];
1135 NoErrorCalc(err, errUpper);
1162 Data()->SetCurrentType(eTT);
1168 if (singleMethod && !method) {
1169 Log() << kFATAL <<
" What do you do? Your method:"
1170 << fMethods.back()->GetName()
1171 <<
" seems not to be a propper TMVA method"
1180 std::vector<Double_t> OldMethodWeight(fMethodWeight);
1181 if (!singleMethod) {
1184 for (
UInt_t i=0; i<=fCurrentMethodIdx; i++)
1185 AllMethodsWeight += fMethodWeight.at(i);
1187 if (AllMethodsWeight != 0.0) {
1188 for (
UInt_t i=0; i<=fCurrentMethodIdx; i++)
1189 fMethodWeight[i] /= AllMethodsWeight;
1195 std::vector <Float_t>* mvaRes;
1197 mvaRes = fMVAvalues;
1199 mvaRes =
new std::vector <Float_t>(GetNEvents());
1200 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1202 (*mvaRes)[ievt] = singleMethod ? method->
GetMvaValue(&err) : GetMvaValue(&err);
1208 fMethodWeight = OldMethodWeight;
1211 Int_t signalClass = 0;
1212 if (DataInfo().GetClassInfo(
"Signal") != 0) {
1213 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
1216 meanS, meanB, rmsS, rmsB,
xmin,
xmax, signalClass );
1225 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1226 if (CalcOverlapIntergral) {
1227 mva_s_overlap =
new TH1F(
"MVA_S_OVERLAP",
"MVA_S_OVERLAP", fNbins,
xmin,
xmax );
1228 mva_b_overlap =
new TH1F(
"MVA_B_OVERLAP",
"MVA_B_OVERLAP", fNbins,
xmin,
xmax );
1230 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1231 const Event* ev = GetEvent(ievt);
1233 if (DataInfo().IsSignal(ev)) mva_s->
Fill( (*mvaRes)[ievt], w );
1234 else mva_b->
Fill( (*mvaRes)[ievt], w );
1236 if (CalcOverlapIntergral) {
1238 if (DataInfo().IsSignal(ev))
1239 mva_s_overlap->
Fill( (*mvaRes)[ievt], w_ov );
1241 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1253 if (CalcOverlapIntergral) {
1257 fOverlap_integral = 0.0;
1260 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1261 if (bc_s > 0.0 && bc_b > 0.0)
1265 delete mva_s_overlap;
1266 delete mva_b_overlap;
1288 Log() << kFATAL <<
"dynamic cast to MethodBase* failed" <<
Endl;
1292 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1316 results->
Store(
new TH1I(
"NodesBeforePruning",
"nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesBeforePruning");
1317 results->
Store(
new TH1I(
"NodesAfterPruning",
"nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesAfterPruning");
1330 Log() << kINFO <<
"<Train> average number of nodes before/after pruning : "
1342 if (methodIndex < 3){
1343 Log() << kDEBUG <<
"No detailed boost monitoring for "
1344 << GetCurrentMethod(methodIndex)->GetMethodName()
1345 <<
" yet available " <<
Endl;
1353 if (fDetailedMonitoring){
1355 if (DataInfo().GetNVariables() == 2) {
1356 results->
Store(
new TH2F(
Form(
"EventDistSig_%d",methodIndex),
Form(
"EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1358 results->
Store(
new TH2F(
Form(
"EventDistBkg_%d",methodIndex),
Form(
"EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1362 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1363 const Event* ev = GetEvent(ievt);
1369 if (DataInfo().IsSignal(ev))
h=results->
GetHist2D(
Form(
"EventDistSig_%d",methodIndex));
#define REGISTER_METHOD(CLASS)
for example
char * Form(const char *fmt,...)
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Describe directory structure in memory.
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
virtual Bool_t cd(const char *path=nullptr)
Change current directory to "this" directory.
1-D histogram with a double per channel (see TH1 documentation)}
1-D histogram with a float per channel (see TH1 documentation)}
1-D histogram with an int per channel (see TH1 documentation)}
TH1 is the base class of all histogram classes in ROOT.
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
virtual Int_t GetNbinsX() const
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
2-D histogram with a float per channel (see TH1 documentation)}
Service class for 2-Dim histogram classes.
Int_t Fill(Double_t)
Invalid Fill method.
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
class TMVA::Config::VariablePlotting fVariablePlotting
Class that contains all the data information.
void ScaleBoostWeight(Double_t s) const
void SetBoostWeight(Double_t w) const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Double_t GetOriginalWeight() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Implementation of the GiniIndex as separation criterion.
Interface for all concrete MVA method implementations.
Virtual base Class for all MVA method.
void SetSilentFile(Bool_t status)
void SetWeightFileDir(TString fileDir)
set directory of weight file
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
virtual void TestClassification()
initialization
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
Types::EMVA GetMethodType() const
void SetSignalReferenceCut(Double_t cut)
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
void SetModelPersistence(Bool_t status)
Double_t GetSignalReferenceCut() const
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
Class for boosting a TMVA method.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
void SingleTrain()
initialization
DataSetManager * fDataSetManager
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
void CreateMVAHistorgrams()
Bool_t fHistoricBoolOption
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------—
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual void TestClassification()
initialization
void InitHistos()
initialisation routine
void ProcessOptions()
process user options
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Double_t SingleBoost(MethodBase *method)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
const Ranking * CreateRanking()
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
std::vector< Float_t > * fMVAvalues
virtual ~MethodBoost(void)
destructor
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
void GetHelpMessage() const
Get help message text.
Class for categorizing the phase space.
DataSetManager * fDataSetManager
Virtual base class for combining several TMVA method.
std::vector< IMethod * > fMethods
Analysis of Boosted Decision Trees.
Int_t GetNNodesBeforePruning()
static void InhibitOutput()
static void EnableOutput()
PDF wrapper for histograms; uses user-defined spline interpolation.
Double_t GetMVAProbAt(Double_t value)
void AddEvent(Double_t val, Double_t weight, Int_t type)
Ranking for variables in method (implementation)
Class that is the base-class for a vector of result.
TH2 * GetHist2D(const TString &alias) const
TH1 * GetHist(const TString &alias) const
void Store(TObject *obj, const char *alias=0)
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Singleton class for Global types used by TMVA.
TString GetMethodName(Types::EMVA method) const
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
virtual void Delete(Option_t *option="")
Delete this object.
Random number generator class based on M.
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
void ToLower()
Change string to lower-case.
A TTree represents a columnar dataset.
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Short_t Max(Short_t a, Short_t b)
Short_t Min(Short_t a, Short_t b)
static uint64_t sum(uint64_t i)