Logo ROOT   6.10/09
Reference Guide
MethodBoost.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 /*! \class TMVA::MethodBoost
33 \ingroup TMVA
34 
35 Class for boosting a TMVA method
36 
37 This class is meant to boost a single classifier. Boosting means
38 training the classifier a few times. Every time the weights of the
39 events are modified according to how well the classifier performed
40 on the test sample.
41 
42 */
43 
44 #include "TMVA/MethodBoost.h"
45 
46 #include "TMVA/ClassifierFactory.h"
47 #include "TMVA/Config.h"
48 #include "TMVA/Configurable.h"
49 #include "TMVA/DataSet.h"
50 #include "TMVA/DataSetInfo.h"
51 #include "TMVA/IMethod.h"
52 #include "TMVA/MethodBase.h"
53 #include "TMVA/MethodCategory.h"
55 #include "TMVA/MethodDT.h"
56 #include "TMVA/MethodFisher.h"
57 #include "TMVA/PDF.h"
58 #include "TMVA/Results.h"
59 #include "TMVA/Timer.h"
60 #include "TMVA/Tools.h"
61 #include "TMVA/Types.h"
62 
63 #include "TMVA/SeparationBase.h"
65 #include "TMVA/GiniIndex.h"
66 #include "TMVA/CrossEntropy.h"
69 
70 #include "Riostream.h"
71 #include "TRandom3.h"
72 #include "TFile.h"
73 #include "TMath.h"
74 #include "TObjString.h"
75 #include "TH1F.h"
76 #include "TH2F.h"
77 #include "TGraph.h"
78 #include "TSpline.h"
79 #include "TDirectory.h"
80 #include "TTree.h"
81 
82 #include <algorithm>
83 #include <iomanip>
84 #include <vector>
85 #include <cmath>
86 
87 
88 REGISTER_METHOD(Boost)
89 
91 
92 ////////////////////////////////////////////////////////////////////////////////
93 
94  TMVA::MethodBoost::MethodBoost( const TString& jobName,
95  const TString& methodTitle,
96  DataSetInfo& theData,
97  const TString& theOption ) :
98  TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption)
99  , fBoostNum(0)
100  , fDetailedMonitoring(kFALSE)
101  , fAdaBoostBeta(0)
102  , fRandomSeed(0)
103  , fBaggedSampleFraction(0)
104  , fBoostedMethodTitle(methodTitle)
105  , fBoostedMethodOptions(theOption)
106  , fMonitorBoostedMethod(kFALSE)
107  , fMonitorTree(0)
108  , fBoostWeight(0)
109  , fMethodError(0)
110  , fROC_training(0.0)
111  , fOverlap_integral(0.0)
112  , fMVAvalues(0)
113 {
114  fMVAvalues = new std::vector<Float_t>;
115  fDataSetManager = NULL;
116  fHistoricBoolOption = kFALSE;
117 }
118 
119 ////////////////////////////////////////////////////////////////////////////////
120 
122  const TString& theWeightFile)
123  : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile)
124  , fBoostNum(0)
125  , fDetailedMonitoring(kFALSE)
126  , fAdaBoostBeta(0)
127  , fRandomSeed(0)
128  , fBaggedSampleFraction(0)
129  , fBoostedMethodTitle("")
130  , fBoostedMethodOptions("")
131  , fMonitorBoostedMethod(kFALSE)
132  , fMonitorTree(0)
133  , fBoostWeight(0)
134  , fMethodError(0)
135  , fROC_training(0.0)
136  , fOverlap_integral(0.0)
137  , fMVAvalues(0)
138 {
139  fMVAvalues = new std::vector<Float_t>;
142 }
143 
144 ////////////////////////////////////////////////////////////////////////////////
145 /// destructor
146 
148 {
149  fMethodWeight.clear();
150 
151  // the histogram themselves are deleted when the file is closed
152 
153  fTrainSigMVAHist.clear();
154  fTrainBgdMVAHist.clear();
155  fBTrainSigMVAHist.clear();
156  fBTrainBgdMVAHist.clear();
157  fTestSigMVAHist.clear();
158  fTestBgdMVAHist.clear();
159 
160  if (fMVAvalues) {
161  delete fMVAvalues;
162  fMVAvalues = 0;
163  }
164 }
165 
166 
167 ////////////////////////////////////////////////////////////////////////////////
168 /// Boost can handle classification with 2 classes and regression with one regression-target
169 
171 {
172  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
173  // if (type == Types::kRegression && numberTargets == 1) return kTRUE;
174  return kFALSE;
175 }
176 
177 
178 ////////////////////////////////////////////////////////////////////////////////
179 
181 {
182  DeclareOptionRef( fBoostNum = 1, "Boost_Num",
183  "Number of times the classifier is boosted" );
184 
185  DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod",
186  "Write monitoring histograms for each boosted classifier" );
187 
188  DeclareOptionRef( fDetailedMonitoring = kFALSE, "Boost_DetailedMonitoring",
189  "Produce histograms for detailed boost monitoring" );
190 
191  DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
192  AddPreDefVal(TString("RealAdaBoost"));
193  AddPreDefVal(TString("AdaBoost"));
194  AddPreDefVal(TString("Bagging"));
195 
196  DeclareOptionRef(fBaggedSampleFraction=.6,"Boost_BaggedSampleFraction","Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
197 
198  DeclareOptionRef( fAdaBoostBeta = 1.0, "Boost_AdaBoostBeta",
199  "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
200 
201  DeclareOptionRef( fTransformString = "step", "Boost_Transform",
202  "Type of transform applied to every boosted method linear, log, step" );
203  AddPreDefVal(TString("step"));
204  AddPreDefVal(TString("linear"));
205  AddPreDefVal(TString("log"));
206  AddPreDefVal(TString("gauss"));
207 
208  DeclareOptionRef( fRandomSeed = 0, "Boost_RandomSeed",
209  "Seed for random number generator used for bagging" );
210 
212 }
213 
214 ////////////////////////////////////////////////////////////////////////////////
215 /// options that are used ONLY for the READER to ensure backward compatibility
216 /// they are hence without any effect (the reader is only reading the training
217 /// options that HAD been used at the training of the .xml weight file at hand
218 
220 {
221 
223 
224  DeclareOptionRef( fHistoricOption = "ByError", "Boost_MethodWeightType",
225  "How to set the final weight of the boosted classifiers" );
226  AddPreDefVal(TString("ByError"));
227  AddPreDefVal(TString("Average"));
228  AddPreDefVal(TString("ByROC"));
229  AddPreDefVal(TString("ByOverlap"));
230  AddPreDefVal(TString("LastMethod"));
231 
232  DeclareOptionRef( fHistoricOption = "step", "Boost_Transform",
233  "Type of transform applied to every boosted method linear, log, step" );
234  AddPreDefVal(TString("step"));
235  AddPreDefVal(TString("linear"));
236  AddPreDefVal(TString("log"));
237  AddPreDefVal(TString("gauss"));
238 
239  // this option here
240  //DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
241  // still exists, but these two possible values
242  AddPreDefVal(TString("HighEdgeGauss"));
243  AddPreDefVal(TString("HighEdgeCoPara"));
244  // have been deleted .. hope that works :)
245 
246  DeclareOptionRef( fHistoricBoolOption, "Boost_RecalculateMVACut",
247  "Recalculate the classifier MVA Signallike cut at every boost iteration" );
248 
249 }
250 
251 ////////////////////////////////////////////////////////////////////////////////
252 /// just registering the string from which the boosted classifier will be created
253 
254 Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption )
255 {
257  fBoostedMethodTitle = methodTitle;
258  fBoostedMethodOptions = theOption;
259  TString opts=theOption;
260  opts.ToLower();
261  // if (opts.Contains("vartransform")) Log() << kFATAL << "It is not possible to use boost in conjunction with variable transform. Please remove either Boost_Num or VarTransform from the option string"<< methodTitle<<Endl;
262 
263  return kTRUE;
264 }
265 
266 ////////////////////////////////////////////////////////////////////////////////
267 
269 {
270 }
271 
272 ////////////////////////////////////////////////////////////////////////////////
273 /// initialisation routine
274 
276 {
277 
279 
280  results->Store(new TH1F("MethodWeight","Normalized Classifier Weight",fBoostNum,0,fBoostNum),"ClassifierWeight");
281  results->Store(new TH1F("BoostWeight","Boost Weight",fBoostNum,0,fBoostNum),"BoostWeight");
282  results->Store(new TH1F("ErrFraction","Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),"ErrorFraction");
283  if (fDetailedMonitoring){
284  results->Store(new TH1F("ROCIntegral_test","ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegral_test");
285  results->Store(new TH1F("ROCIntegralBoosted_test","ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_test");
286  results->Store(new TH1F("ROCIntegral_train","ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),"ROCIntegral_train");
287  results->Store(new TH1F("ROCIntegralBoosted_train","ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_train");
288  results->Store(new TH1F("OverlapIntegal_train","Overlap integral (training sample)",fBoostNum,0,fBoostNum),"Overlap");
289  }
290 
291 
292  results->GetHist("ClassifierWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
293  results->GetHist("ClassifierWeight")->GetYaxis()->SetTitle("Classifier Weight");
294  results->GetHist("BoostWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
295  results->GetHist("BoostWeight")->GetYaxis()->SetTitle("Boost Weight");
296  results->GetHist("ErrorFraction")->GetXaxis()->SetTitle("Index of boosted classifier");
297  results->GetHist("ErrorFraction")->GetYaxis()->SetTitle("Error Fraction");
298  if (fDetailedMonitoring){
299  results->GetHist("ROCIntegral_test")->GetXaxis()->SetTitle("Index of boosted classifier");
300  results->GetHist("ROCIntegral_test")->GetYaxis()->SetTitle("ROC integral of single classifier");
301  results->GetHist("ROCIntegralBoosted_test")->GetXaxis()->SetTitle("Number of boosts");
302  results->GetHist("ROCIntegralBoosted_test")->GetYaxis()->SetTitle("ROC integral boosted");
303  results->GetHist("ROCIntegral_train")->GetXaxis()->SetTitle("Index of boosted classifier");
304  results->GetHist("ROCIntegral_train")->GetYaxis()->SetTitle("ROC integral of single classifier");
305  results->GetHist("ROCIntegralBoosted_train")->GetXaxis()->SetTitle("Number of boosts");
306  results->GetHist("ROCIntegralBoosted_train")->GetYaxis()->SetTitle("ROC integral boosted");
307  results->GetHist("Overlap")->GetXaxis()->SetTitle("Index of boosted classifier");
308  results->GetHist("Overlap")->GetYaxis()->SetTitle("Overlap integral");
309  }
310 
311  results->Store(new TH1F("SoverBtotal","S/B in reweighted training sample",fBoostNum,0,fBoostNum),"SoverBtotal");
312  results->GetHist("SoverBtotal")->GetYaxis()->SetTitle("S/B (boosted sample)");
313  results->GetHist("SoverBtotal")->GetXaxis()->SetTitle("Index of boosted classifier");
314 
315  results->Store(new TH1F("SeparationGain","SeparationGain",fBoostNum,0,fBoostNum),"SeparationGain");
316  results->GetHist("SeparationGain")->GetYaxis()->SetTitle("SeparationGain");
317  results->GetHist("SeparationGain")->GetXaxis()->SetTitle("Index of boosted classifier");
318 
319 
320 
321  fMonitorTree= new TTree("MonitorBoost","Boost variables");
322  fMonitorTree->Branch("iMethod",&fCurrentMethodIdx,"iMethod/I");
323  fMonitorTree->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
324  fMonitorTree->Branch("errorFraction",&fMethodError,"errorFraction/D");
326 
327 }
328 
329 
330 ////////////////////////////////////////////////////////////////////////////////
331 
333 {
334  Log() << kDEBUG << "CheckSetup: fBoostType="<<fBoostType << Endl;
335  Log() << kDEBUG << "CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<Endl;
336  Log() << kDEBUG << "CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
337  Log() << kDEBUG << "CheckSetup: fMethodError="<<fMethodError<<Endl;
338  Log() << kDEBUG << "CheckSetup: fBoostNum="<<fBoostNum << Endl;
339  Log() << kDEBUG << "CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
340  Log() << kDEBUG << "CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
341  Log() << kDEBUG << "CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
342  Log() << kDEBUG << "CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod? "true" : "false") << Endl;
343  Log() << kDEBUG << "CheckSetup: MName=" << fBoostedMethodName << " Title="<< fBoostedMethodTitle<< Endl;
344  Log() << kDEBUG << "CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
345  Log() << kDEBUG << "CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
346  Log() << kDEBUG << "CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx << Endl;
347  if (fMethods.size()>0) Log() << kDEBUG << "CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
348  Log() << kDEBUG << "CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
349  if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
350  Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl;
351 
352 }
353 ////////////////////////////////////////////////////////////////////////////////
354 
356 {
357  TDirectory* methodDir( 0 );
358  TString dirName,dirTitle;
359  Int_t StopCounter=0;
361 
362 
363  InitHistos();
364 
365  if (Data()->GetNTrainingEvents()==0) Log() << kFATAL << "<Train> Data() has zero events" << Endl;
367 
368  if (fMethods.size() > 0) fMethods.clear();
369  fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
370 
371  Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " with title " << fBoostedMethodTitle << " Classifiers ... patience please" << Endl;
373 
375 
376  // clean boosted method options
378 
379 
380  // remove transformations for individual boosting steps
381  // the transformation of the main method will be rerouted to each of the boost steps
382  Ssiz_t varTrafoStart=fBoostedMethodOptions.Index("~VarTransform=");
383  if (varTrafoStart >0) {
384  Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(":",varTrafoStart);
385  if (varTrafoEnd<varTrafoStart)
386  varTrafoEnd=fBoostedMethodOptions.Length();
387  fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
388  }
389 
390  //
391  // training and boosting the classifiers
393  // the first classifier shows the option string output, the rest not
395 
397  GetJobName(),
399  DataInfo(),
402 
403  // suppressing the rest of the classifier output the right way
404  fCurrentMethod = (dynamic_cast<MethodBase*>(method));
405 
406  if (fCurrentMethod==0) {
407  Log() << kFATAL << "uups.. guess the booking of the " << fCurrentMethodIdx << "-th classifier somehow failed" << Endl;
408  return; // hope that makes coverity happy (as if fears I might use the pointer later on, not knowing that FATAL exits
409  }
410 
411  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
412  if (fCurrentMethod->GetMethodType() == Types::kCategory) { // DSMTEST
413  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(fCurrentMethod)); // DSMTEST
414  if (!methCat) // DSMTEST
415  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl; // DSMTEST
416  methCat->fDataSetManager = fDataSetManager; // DSMTEST
417  } // DSMTEST
418 
419  fCurrentMethod->SetMsgType(kWARNING);
422  // put SetAnalysisType here for the needs of MLP
426 
427 
428  // reroute transformationhandler
430 
431 
432  // creating the directory of the classifier
433  if(!IsSilentFile())
434  {
435  if (fMonitorBoostedMethod) {
436  methodDir=GetFile()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
437  if (methodDir==0) {
438  methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
439  }
440  fCurrentMethod->SetMethodDir(methodDir);
441  fCurrentMethod->BaseDir()->cd();
442  }
443  }
444 
445  // training
446  TMVA::MethodCompositeBase::fMethods.push_back(method);
450  TMVA::MsgLogger::InhibitOutput(); //suppressing Logger outside the method
451  if (fBoostType=="Bagging") Bagging(); // you want also to train the first classifier on a bagged sample
452  SingleTrain();
455 
456  // calculate MVA values of current method for all events in training sample
457  // (used later on to get 'misclassified events' etc for the boosting
458  CalcMVAValues();
459 
461 
462  // get ROC integral and overlap integral for single method on
463  // training sample if fMethodWeightType == "ByROC" or the user
464  // wants detailed monitoring
465 
466  // boosting (reweight training sample)
469 
471  results->GetHist("BoostWeight")->SetBinContent(fCurrentMethodIdx+1,fBoostWeight);
472  results->GetHist("ErrorFraction")->SetBinContent(fCurrentMethodIdx+1,fMethodError);
473 
474  if (fDetailedMonitoring) {
477  results->GetHist("ROCIntegralBoosted_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
478  results->GetHist("ROCIntegral_train")->SetBinContent(fCurrentMethodIdx+1, fROC_training);
479  results->GetHist("ROCIntegralBoosted_train")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
480  results->GetHist("Overlap")->SetBinContent(fCurrentMethodIdx+1, fOverlap_integral);
481  }
482 
483 
484 
485  fMonitorTree->Fill();
486 
487  // stop boosting if needed when error has reached 0.5
488  // thought of counting a few steps, but it doesn't seem to be necessary
489  Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl;
490  if (fMethodError > 0.49999) StopCounter++;
491  if (StopCounter > 0 && fBoostType != "Bagging") {
492  timer.DrawProgressBar( fBoostNum );
493  fBoostNum = fCurrentMethodIdx+1;
494  Log() << kINFO << "Error rate has reached 0.5 ("<< fMethodError<<"), boosting process stopped at #" << fBoostNum << " classifier" << Endl;
495  if (fBoostNum < 5)
496  Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fAdaBoostBeta << ", try reducing it." <<Endl;
497  break;
498  }
499  }
500 
501  //as MethodBoost acts not on a private event sample (like MethodBDT does), we need to remember not
502  // to leave "boosted" events to the next classifier in the factory
503 
505 
506  Timer* timer1= new Timer( fBoostNum, GetName() );
507  // normalizing the weights of the classifiers
509  // performing post-boosting actions
510 
512 
513  if (fCurrentMethodIdx==fBoostNum) {
514  Log() << kINFO << "Elapsed time: " << timer1->GetElapsedTime()
515  << " " << Endl;
516  }
517 
518  TH1F* tmp = dynamic_cast<TH1F*>( results->GetHist("ClassifierWeight") );
520 
521  }
522 
523  // Ensure that in case of only 1 boost the method weight equals
524  // 1.0. This avoids unexpected behaviour in case of very bad
525  // classifiers which have fBoostWeight=1 or fMethodError=0.5,
526  // because their weight would be set to zero. This behaviour is
527  // not ok if one boosts just one time.
528  if (fMethods.size()==1) fMethodWeight[0] = 1.0;
529 
531 
532  delete timer1;
533 }
534 
535 ////////////////////////////////////////////////////////////////////////////////
536 
538 {
540 }
541 
542 ////////////////////////////////////////////////////////////////////////////////
543 
545 {
546  if (fBoostNum <=0) Log() << kFATAL << "CreateHistograms called before fBoostNum is initialized" << Endl;
547  // calculating histograms boundaries and creating histograms..
548  // nrms = number of rms around the average to use for outline (of the 0 classifier)
549  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
550  Int_t signalClass = 0;
551  if (DataInfo().GetClassInfo("Signal") != 0) {
552  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
553  }
555  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
556 
558  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
559  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
560 
561  // creating all the histograms
562  for (UInt_t imtd=0; imtd<fBoostNum; imtd++) {
563  fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) );
564  fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i", imtd), "MVA_Train_B", fNbins, xmin, xmax ) );
565  fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
566  fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
567  fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i", imtd), "MVA_Test_S", fNbins, xmin, xmax ) );
568  fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i", imtd), "MVA_Test_B", fNbins, xmin, xmax ) );
569  }
570 }
571 
572 ////////////////////////////////////////////////////////////////////////////////
573 /// resetting back the boosted weights of the events to 1
574 
576 {
577  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
578  const Event *ev = Data()->GetEvent(ievt);
579  ev->SetBoostWeight( 1.0 );
580  }
581 }
582 
583 ////////////////////////////////////////////////////////////////////////////////
584 
586 {
587  TDirectory* dir=0;
588  if (fMonitorBoostedMethod) {
589  for (UInt_t imtd=0;imtd<fBoostNum;imtd++) {
590 
591  //writing the histograms in the specific classifier's directory
592  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]);
593  if (!m) continue;
594  dir = m->BaseDir();
595  dir->cd();
596  fTrainSigMVAHist[imtd]->SetDirectory(dir);
597  fTrainSigMVAHist[imtd]->Write();
598  fTrainBgdMVAHist[imtd]->SetDirectory(dir);
599  fTrainBgdMVAHist[imtd]->Write();
600  fBTrainSigMVAHist[imtd]->SetDirectory(dir);
601  fBTrainSigMVAHist[imtd]->Write();
602  fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
603  fBTrainBgdMVAHist[imtd]->Write();
604  }
605  }
606 
607  // going back to the original folder
608  BaseDir()->cd();
609 
610  fMonitorTree->Write();
611 }
612 
613 ////////////////////////////////////////////////////////////////////////////////
614 
616 {
618  if (fMonitorBoostedMethod) {
619  UInt_t nloop = fTestSigMVAHist.size();
620  if (fMethods.size()<nloop) nloop = fMethods.size();
621  //running over all the events and populating the test MVA histograms
623  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
624  const Event* ev = GetEvent(ievt);
625  Float_t w = ev->GetWeight();
626  if (DataInfo().IsSignal(ev)) {
627  for (UInt_t imtd=0; imtd<nloop; imtd++) {
628  fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
629  }
630  }
631  else {
632  for (UInt_t imtd=0; imtd<nloop; imtd++) {
633  fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
634  }
635  }
636  }
638  }
639 }
640 
641 ////////////////////////////////////////////////////////////////////////////////
642 
644 {
646  if (treetype==Types::kTraining) return;
647  UInt_t nloop = fTestSigMVAHist.size();
648  if (fMethods.size()<nloop) nloop = fMethods.size();
649  if (fMonitorBoostedMethod) {
650  TDirectory* dir=0;
651  for (UInt_t imtd=0;imtd<nloop;imtd++) {
652  //writing the histograms in the specific classifier's directory
653  MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]);
654  if (!mva) continue;
655  dir = mva->BaseDir();
656  if (dir==0) continue;
657  dir->cd();
658  fTestSigMVAHist[imtd]->SetDirectory(dir);
659  fTestSigMVAHist[imtd]->Write();
660  fTestBgdMVAHist[imtd]->SetDirectory(dir);
661  fTestBgdMVAHist[imtd]->Write();
662  }
663  }
664 }
665 
666 ////////////////////////////////////////////////////////////////////////////////
667 /// process user options
668 
670 {
671 }
672 
673 ////////////////////////////////////////////////////////////////////////////////
674 /// initialization
675 
677 {
679  MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod());
680  if (meth){
681  meth->SetSilentFile(IsSilentFile());
682  if(IsModelPersistence()){
683  TString _fFileDir= DataInfo().GetName();
684  _fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
685  meth->SetWeightFileDir(_fFileDir);
686  }
688  meth->TrainMethod();
689  }
690 }
691 
692 ////////////////////////////////////////////////////////////////////////////////
693 /// find the CUT on the individual MVA that defines an event as
694 /// correct or misclassified (to be used in the boosting process)
695 
697 {
698  if (!method || method->GetMethodType() == Types::kDT ){ return;}
699 
700  // creating a fine histograms containing the error rate
701  const Int_t nBins=10001;
702  Double_t minMVA=150000;
703  Double_t maxMVA=-150000;
704  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
705  GetEvent(ievt);
706  Double_t val=method->GetMvaValue();
707  //Helge .. I think one could very well use fMVAValues for that ... -->to do
708  if (val>maxMVA) maxMVA=val;
709  if (val<minMVA) minMVA=val;
710  }
711  maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
712 
713  Double_t sum = 0.;
714 
715  TH1D *mvaS = new TH1D(Form("MVAS_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
716  TH1D *mvaB = new TH1D(Form("MVAB_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
717  TH1D *mvaSC = new TH1D(Form("MVASC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
718  TH1D *mvaBC = new TH1D(Form("MVABC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
719 
720 
722  if (fDetailedMonitoring){
723  results->Store(mvaS, Form("MVAS_%d",fCurrentMethodIdx));
724  results->Store(mvaB, Form("MVAB_%d",fCurrentMethodIdx));
725  results->Store(mvaSC,Form("MVASC_%d",fCurrentMethodIdx));
726  results->Store(mvaBC,Form("MVABC_%d",fCurrentMethodIdx));
727  }
728 
729  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
730 
731  Double_t weight = GetEvent(ievt)->GetWeight();
732  Double_t mvaVal=method->GetMvaValue();
733  sum +=weight;
734  if (DataInfo().IsSignal(GetEvent(ievt))){
735  mvaS->Fill(mvaVal,weight);
736  }else {
737  mvaB->Fill(mvaVal,weight);
738  }
739  }
740  SeparationBase *sepGain;
741 
742 
743  // Boosting should use Misclassification not Gini Index (changed, Helge 31.5.2013)
744  // WARNING! It works with Misclassification only if you fix the signal to
745  // background at every step. Strangely enough, there are better results
746  // ( as seen with BDT ) if you use Gini Index, and accept that sometimes no
747  // sensible cut is found - i.e. the cut is then outside the MVA value range,
748  // all events are classified as background and then according to the Boost
749  // algorithm something is renormed 'automatically' ... so that in the next
750  // step again the result is something sensible.
751  // Strange ... that THIS is supposed to be right?
752 
753  // SeparationBase *sepGain2 = new MisClassificationError();
754  //sepGain = new MisClassificationError();
755  sepGain = new GiniIndex();
756  //sepGain = new CrossEntropy();
757 
758  Double_t sTot = mvaS->GetSum();
759  Double_t bTot = mvaB->GetSum();
760 
761  mvaSC->SetBinContent(1,mvaS->GetBinContent(1));
762  mvaBC->SetBinContent(1,mvaB->GetBinContent(1));
763  Double_t sSel=0;
764  Double_t bSel=0;
765  Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
766  Double_t mvaCut=mvaSC->GetBinLowEdge(1);
767  Double_t sSelCut=sSel;
768  Double_t bSelCut=bSel;
769  // std::cout << "minMVA =" << minMVA << " maxMVA = " << maxMVA << " width = " << mvaSC->GetBinWidth(1) << std::endl;
770 
771  // for (Int_t ibin=1;ibin<=nBins;ibin++) std::cout << " cutvalues[" << ibin<<"]="<<mvaSC->GetBinLowEdge(ibin) << " " << mvaSC->GetBinCenter(ibin) << std::endl;
772  Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal
773  for (Int_t ibin=1;ibin<=nBins;ibin++){
774  mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1));
775  mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1));
776 
777  sSel=mvaSC->GetBinContent(ibin);
778  bSel=mvaBC->GetBinContent(ibin);
779 
780  // if (ibin==nBins){
781  // std::cout << "Last bin s="<< sSel <<" b="<<bSel << " s="<< sTot-sSel <<" b="<<bTot-bSel << endl;
782  // }
783 
784  if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
785  // && (mvaSC->GetBinCenter(ibin) >0 || (fCurrentMethodIdx+1)%2 )
786  ){
787  separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
788  // mvaCut=mvaSC->GetBinCenter(ibin);
789  mvaCut=mvaSC->GetBinLowEdge(ibin+1);
790  // if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1;
791  if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
792  else mvaCutOrientation=1;
793  sSelCut=sSel;
794  bSelCut=bSel;
795  // std::cout << "new cut at " << mvaCut << "with s="<<sTot-sSel << " b="<<bTot-bSel << std::endl;
796  }
797  /*
798  Double_t ori;
799  if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) ori=-1;
800  else ori=1;
801  std::cout << ibin << " mvacut="<<mvaCut
802  << " sTot=" << sTot
803  << " bTot=" << bTot
804  << " sSel=" << sSel
805  << " bSel=" << bSel
806  << " s/b(1)=" << sSel/bSel
807  << " s/b(2)=" << (sTot-sSel)/(bTot-bSel)
808  << " sepGain="<<sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
809  << " sepGain2="<<sepGain2->GetSeparationGain(sSel,bSel,sTot,bTot)
810  << " " <<ori
811  << std::endl;
812  */
813 
814  }
815 
816  if (0){
817  double parentIndex=sepGain->GetSeparationIndex(sTot,bTot);
818  double leftIndex =sepGain->GetSeparationIndex(sSelCut,bSelCut);
819  double rightIndex =sepGain->GetSeparationIndex(sTot-sSelCut,bTot-bSelCut);
820  std::cout
821  << " sTot=" << sTot
822  << " bTot=" << bTot
823  << " s="<<sSelCut
824  << " b="<<bSelCut
825  << " s2="<<(sTot-sSelCut)
826  << " b2="<<(bTot-bSelCut)
827  << " s/b(1)=" << sSelCut/bSelCut
828  << " s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
829  << " index before cut=" << parentIndex
830  << " after: left=" << leftIndex
831  << " after: right=" << rightIndex
832  << " sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
833  << " sepGain="<<separationGain
834  << " sepGain="<<sepGain->GetSeparationGain(sSelCut,bSelCut,sTot,bTot)
835  << " cut=" << mvaCut
836  << " idx="<<fCurrentMethodIdx
837  << " cutOrientation="<<mvaCutOrientation
838  << std::endl;
839  }
840  method->SetSignalReferenceCut(mvaCut);
841  method->SetSignalReferenceCutOrientation(mvaCutOrientation);
842 
843  results->GetHist("SeparationGain")->SetBinContent(fCurrentMethodIdx+1,separationGain);
844 
845 
846  Log() << kDEBUG << "(old step) Setting method cut to " <<method->GetSignalReferenceCut()<< Endl;
847 
848  if(IsSilentFile())
849  {
850  mvaS ->Delete();
851  mvaB ->Delete();
852  mvaSC->Delete();
853  mvaBC->Delete();
854  }
855 }
856 
857 ////////////////////////////////////////////////////////////////////////////////
858 
860 {
861  Double_t returnVal=-1;
862 
863 
864  if (fBoostType=="AdaBoost") returnVal = this->AdaBoost (method,1);
865  else if (fBoostType=="RealAdaBoost") returnVal = this->AdaBoost (method,0);
866  else if (fBoostType=="Bagging") returnVal = this->Bagging ();
867  else{
868  Log() << kFATAL << "<Boost> unknown boost option " << fBoostType<< " called" << Endl;
869  }
870  fMethodWeight.push_back(returnVal);
871  return returnVal;
872 }
873 ////////////////////////////////////////////////////////////////////////////////
874 /// the standard (discrete or real) AdaBoost algorithm
875 
877 {
878  if (!method) {
879  Log() << kWARNING << " AdaBoost called without classifier reference - needed for calculating AdaBoost " << Endl;
880  return 0;
881  }
882 
883  Float_t w,v; Bool_t sig=kTRUE;
884  Double_t sumAll=0, sumWrong=0;
885  Bool_t* WrongDetection=new Bool_t[GetNEvents()];
886  QuickMVAProbEstimator *MVAProb=NULL;
887 
888  if (discreteAdaBoost) {
889  FindMVACut(method);
890  Log() << kDEBUG << " individual mva cut value = " << method->GetSignalReferenceCut() << Endl;
891  } else {
892  MVAProb=new TMVA::QuickMVAProbEstimator();
893  // the RealAdaBoost does use a simple "yes (signal)" or "no (background)"
894  // answer from your single MVA, but a "signal probability" instead (in the BDT case,
895  // that would be the 'purity' in the leaf node. For some MLP parameter, the MVA output
896  // can also interpreted as a probability, but here I try a general approach to get this
897  // probability from the MVA distributions...
898 
899  for (Long64_t evt=0; evt<GetNEvents(); evt++) {
900  const Event* ev = Data()->GetEvent(evt);
901  MVAProb->AddEvent(fMVAvalues->at(evt),ev->GetWeight(),ev->GetClass());
902  }
903  }
904 
905 
906  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
907 
908  // finding the wrong events and calculating their total weights
909  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
910  const Event* ev = GetEvent(ievt);
911  sig=DataInfo().IsSignal(ev);
912  v = fMVAvalues->at(ievt);
913  w = ev->GetWeight();
914  sumAll += w;
915  if(!IsSilentFile())
916  {
917  if (fMonitorBoostedMethod) {
918  if (sig) {
921  }
922  else {
925  }
926  }
927  }
928 
929  if (discreteAdaBoost){
930  if (sig == method->IsSignalLike(fMVAvalues->at(ievt))){
931  WrongDetection[ievt]=kFALSE;
932  }else{
933  WrongDetection[ievt]=kTRUE;
934  sumWrong+=w;
935  }
936  }else{
937  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
938  mvaProb = 2*(mvaProb-0.5);
939  Int_t trueType;
940  if (DataInfo().IsSignal(ev)) trueType = 1;
941  else trueType = -1;
942  sumWrong+= w*trueType*mvaProb;
943  }
944  }
945 
946  fMethodError=sumWrong/sumAll;
947 
948  // calculating the fMethodError and the boostWeight out of it uses the formula
949  // w = ((1-err)/err)^beta
950 
951  Double_t boostWeight=0;
952 
953  if (fMethodError == 0) { //no misclassification made.. perfect, no boost ;)
954  Log() << kWARNING << "Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " << Endl;
955  }else{
956 
957  if (discreteAdaBoost)
959  else
960  boostWeight = TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
961 
962 
963  // std::cout << "boostweight = " << boostWeight << std::endl;
964 
965  // ADA boosting, rescaling the weight of the wrong events according to the error level
966  // over the entire test sample rescaling all the weights to have the same sum, but without
967  // touching the original weights (changing only the boosted weight of all the events)
968  // first reweight
969 
970  Double_t newSum=0., oldSum=0.;
971 
972 
973  Double_t boostfactor = TMath::Exp(boostWeight);
974 
975 
976  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
977  const Event* ev = Data()->GetEvent(ievt);
978  oldSum += ev->GetWeight();
979  if (discreteAdaBoost){
980  // events are classified as Signal OR background .. right or wrong
981  if (WrongDetection[ievt] && boostWeight != 0) {
982  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
983  else ev->ScaleBoostWeight(1./boostfactor);
984  }
985  // if (ievt<30) std::cout<<ievt<<" var0="<<ev->GetValue(0)<<" var1="<<ev->GetValue(1)<<" weight="<<ev->GetWeight() << " boostby:"<<boostfactor<<std::endl;
986 
987  }else{
988  // events are classified by their probability of being signal or background
989  // (eventually you should write this one - i.e. re-use the MVA value that were already
990  // calculated and stored.. however ,for the moment ..
991  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
992  mvaProb = 2*(mvaProb-0.5);
993  // mvaProb = (1-mvaProb);
994 
995  Int_t trueType=1;
996  if (DataInfo().IsSignal(ev)) trueType = 1;
997  else trueType = -1;
998 
999  boostfactor = TMath::Exp(-1*boostWeight*trueType*mvaProb);
1000  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
1001  else ev->ScaleBoostWeight(1./boostfactor);
1002 
1003  }
1004  newSum += ev->GetWeight();
1005  }
1006 
1007  Double_t normWeight = oldSum/newSum;
1008  // next normalize the weights
1009  Double_t normSig=0, normBkg=0;
1010  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1011  const Event* ev = Data()->GetEvent(ievt);
1012  ev->ScaleBoostWeight(normWeight);
1013  if (ev->GetClass()) normSig+=ev->GetWeight();
1014  else normBkg+=ev->GetWeight();
1015  }
1016 
1018  results->GetHist("SoverBtotal")->SetBinContent(fCurrentMethodIdx+1, normSig/normBkg);
1019 
1020  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1021  const Event* ev = Data()->GetEvent(ievt);
1022 
1023  if (ev->GetClass()) ev->ScaleBoostWeight(oldSum/normSig/2);
1024  else ev->ScaleBoostWeight(oldSum/normBkg/2);
1025  }
1026  }
1027 
1028  delete[] WrongDetection;
1029  if (MVAProb) delete MVAProb;
1030 
1031  fBoostWeight = boostWeight; // used ONLY for the monitoring tree
1032 
1033  return boostWeight;
1034 }
1035 
1036 
1037 ////////////////////////////////////////////////////////////////////////////////
1038 /// Bagging or Bootstrap boosting, gives new random poisson weight for every event
1039 
1041 {
1042  TRandom3 *trandom = new TRandom3(fRandomSeed+fMethods.size());
1043  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1044  const Event* ev = Data()->GetEvent(ievt);
1046  }
1047  fBoostWeight = 1; // used ONLY for the monitoring tree
1048  return 1.;
1049 }
1050 
1051 
1052 ////////////////////////////////////////////////////////////////////////////////
1053 /// Get help message text
1054 ///
1055 /// typical length of text line:
1056 /// "|--------------------------------------------------------------|"
1057 
1059 {
1060  Log() << Endl;
1061  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1062  Log() << Endl;
1063  Log() << "This method combines several classifier of one species in a "<<Endl;
1064  Log() << "single multivariate quantity via the boost algorithm." << Endl;
1065  Log() << "the output is a weighted sum over all individual classifiers" <<Endl;
1066  Log() << "By default, the AdaBoost method is employed, which gives " << Endl;
1067  Log() << "events that were misclassified in the previous tree a larger " << Endl;
1068  Log() << "weight in the training of the following classifier."<<Endl;
1069  Log() << "Optionally, Bagged boosting can also be applied." << Endl;
1070  Log() << Endl;
1071  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1072  Log() << Endl;
1073  Log() << "The most important parameter in the configuration is the "<<Endl;
1074  Log() << "number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
1075  Log() << "(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
1076  Log() << "AdaBoosting: The most important parameters in this configuration" <<Endl;
1077  Log() << "is the beta parameter (Boost_AdaBoostBeta) " << Endl;
1078  Log() << "When boosting a linear classifier, it is sometimes advantageous"<<Endl;
1079  Log() << "to transform the MVA output non-linearly. The following options" <<Endl;
1080  Log() << "are available: step, log, and minmax, the default is no transform."<<Endl;
1081  Log() <<Endl;
1082  Log() << "Some classifiers are hard to boost and do not improve much in"<<Endl;
1083  Log() << "their performance by boosting them, some even slightly deteriorate"<< Endl;
1084  Log() << "due to the boosting." <<Endl;
1085  Log() << "The booking of the boost method is special since it requires"<<Endl;
1086  Log() << "the booing of the method to be boosted and the boost itself."<<Endl;
1087  Log() << "This is solved by booking the method to be boosted and to add"<<Endl;
1088  Log() << "all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
1089  Log() << "options string. The factory separates the options and initiates"<<Endl;
1090  Log() << "the boost process. The TMVA macro directory contains the example"<<Endl;
1091  Log() << "macro \"Boost.C\"" <<Endl;
1092 }
1093 
1094 ////////////////////////////////////////////////////////////////////////////////
1095 
1097 {
1098  return 0;
1099 }
1100 
1101 ////////////////////////////////////////////////////////////////////////////////
1102 /// return boosted MVA response
1103 
1105 {
1106  Double_t mvaValue = 0;
1107  Double_t norm = 0;
1108  Double_t epsilon = TMath::Exp(-1.);
1109  //Double_t fact = TMath::Exp(-1.)+TMath::Exp(1.);
1110  for (UInt_t i=0;i< fMethods.size(); i++){
1111  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]);
1112  if (m==0) continue;
1113  Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
1114  Double_t sigcut = m->GetSignalReferenceCut();
1115 
1116  // default is no transform
1117  if (fTransformString == "linear"){
1118 
1119  }
1120  else if (fTransformString == "log"){
1121  if (val < sigcut) val = sigcut;
1122 
1123  val = TMath::Log((val-sigcut)+epsilon);
1124  }
1125  else if (fTransformString == "step" ){
1126  if (m->IsSignalLike(val)) val = 1.;
1127  else val = -1.;
1128  }
1129  else if (fTransformString == "gauss"){
1130  val = TMath::Gaus((val-sigcut),1);
1131  }
1132  else {
1133  Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl;
1134  }
1135  mvaValue+=val*fMethodWeight[i];
1136  norm +=fMethodWeight[i];
1137  // std::cout << "mva("<<i<<") = "<<val<<" " << valx<< " " << mvaValue<<" and sigcut="<<sigcut << std::endl;
1138  }
1139  mvaValue/=norm;
1140  // cannot determine error
1141  NoErrorCalc(err, errUpper);
1142 
1143  return mvaValue;
1144 }
1145 
1146 ////////////////////////////////////////////////////////////////////////////////
1147 /// Calculate the ROC integral of a single classifier or even the
1148 /// whole boosted classifier. The tree type (training or testing
1149 /// sample) is specified by 'eTT'.
1150 ///
1151 /// If tree type kTraining is set, the original training sample is
1152 /// used to compute the ROC integral (original weights).
1153 ///
1154 /// - singleMethod - if kTRUE, return ROC integral of single (last
1155 /// trained) classifier; if kFALSE, return ROC
1156 /// integral of full classifier
1157 ///
1158 /// - eTT - tree type (Types::kTraining / Types::kTesting)
1159 ///
1160 /// - CalcOverlapIntergral - if kTRUE, the overlap integral of the
1161 /// signal/background MVA distributions
1162 /// is calculated and stored in
1163 /// 'fOverlap_integral'
1164 
1166 {
1167  // set data sample training / testing
1168  Data()->SetCurrentType(eTT);
1169 
1170  MethodBase* method = singleMethod ? dynamic_cast<MethodBase*>(fMethods.back()) : 0; // ToDo CoVerity flags this line as there is no protection against a zero-pointer delivered by dynamic_cast
1171  // to make CoVerity happy (although, OF COURSE, the last method in the committee
1172  // has to be also of type MethodBase as ANY method is... hence the dynamic_cast
1173  // will never by "zero" ...
1174  if (singleMethod && !method) {
1175  Log() << kFATAL << " What do you do? Your method:"
1176  << fMethods.back()->GetName()
1177  << " seems not to be a propper TMVA method"
1178  << Endl;
1179  std::exit(1);
1180  }
1181  Double_t err = 0.0;
1182 
1183  // temporary renormalize the method weights in case of evaluation
1184  // of full classifier.
1185  // save the old normalization of the methods
1186  std::vector<Double_t> OldMethodWeight(fMethodWeight);
1187  if (!singleMethod) {
1188  // calculate sum of weights of all methods
1189  Double_t AllMethodsWeight = 0;
1190  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1191  AllMethodsWeight += fMethodWeight.at(i);
1192  // normalize the weights of the classifiers
1193  if (AllMethodsWeight != 0.0) {
1194  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1195  fMethodWeight[i] /= AllMethodsWeight;
1196  }
1197  }
1198 
1199  // calculate MVA values
1200  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
1201  std::vector <Float_t>* mvaRes;
1202  if (singleMethod && eTT==Types::kTraining)
1203  mvaRes = fMVAvalues; // values already calculated
1204  else {
1205  mvaRes = new std::vector <Float_t>(GetNEvents());
1206  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1207  GetEvent(ievt);
1208  (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
1209  }
1210  }
1211 
1212  // restore the method weights
1213  if (!singleMethod)
1214  fMethodWeight = OldMethodWeight;
1215 
1216  // now create histograms for calculation of the ROC integral
1217  Int_t signalClass = 0;
1218  if (DataInfo().GetClassInfo("Signal") != 0) {
1219  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1220  }
1221  gTools().ComputeStat( GetEventCollection(eTT), mvaRes,
1222  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1223 
1225  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
1226  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
1227 
1228  // calculate ROC integral
1229  TH1* mva_s = new TH1F( "MVA_S", "MVA_S", fNbins, xmin, xmax );
1230  TH1* mva_b = new TH1F( "MVA_B", "MVA_B", fNbins, xmin, xmax );
1231  TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1232  if (CalcOverlapIntergral) {
1233  mva_s_overlap = new TH1F( "MVA_S_OVERLAP", "MVA_S_OVERLAP", fNbins, xmin, xmax );
1234  mva_b_overlap = new TH1F( "MVA_B_OVERLAP", "MVA_B_OVERLAP", fNbins, xmin, xmax );
1235  }
1236  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1237  const Event* ev = GetEvent(ievt);
1238  Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
1239  if (DataInfo().IsSignal(ev)) mva_s->Fill( (*mvaRes)[ievt], w );
1240  else mva_b->Fill( (*mvaRes)[ievt], w );
1241 
1242  if (CalcOverlapIntergral) {
1243  Float_t w_ov = ev->GetWeight();
1244  if (DataInfo().IsSignal(ev))
1245  mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
1246  else
1247  mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1248  }
1249  }
1250  gTools().NormHist( mva_s );
1251  gTools().NormHist( mva_b );
1252  PDF *fS = new PDF( "PDF Sig", mva_s, PDF::kSpline2 );
1253  PDF *fB = new PDF( "PDF Bkg", mva_b, PDF::kSpline2 );
1254 
1255  // calculate ROC integral from fS, fB
1256  Double_t ROC = MethodBase::GetROCIntegral(fS, fB);
1257 
1258  // calculate overlap integral
1259  if (CalcOverlapIntergral) {
1260  gTools().NormHist( mva_s_overlap );
1261  gTools().NormHist( mva_b_overlap );
1262 
1263  fOverlap_integral = 0.0;
1264  for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
1265  Double_t bc_s = mva_s_overlap->GetBinContent(bin);
1266  Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1267  if (bc_s > 0.0 && bc_b > 0.0)
1268  fOverlap_integral += TMath::Min(bc_s, bc_b);
1269  }
1270 
1271  delete mva_s_overlap;
1272  delete mva_b_overlap;
1273  }
1274 
1275  delete mva_s;
1276  delete mva_b;
1277  delete fS;
1278  delete fB;
1279  if (!(singleMethod && eTT==Types::kTraining)) delete mvaRes;
1280 
1282 
1283  return ROC;
1284 }
1285 
1287 {
1288  // Calculate MVA values of current method fMethods.back() on
1289  // training sample
1290 
1292  MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
1293  if (!method) {
1294  Log() << kFATAL << "dynamic cast to MethodBase* failed" <<Endl;
1295  return;
1296  }
1297  // calculate MVA values
1298  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1299  GetEvent(ievt);
1300  fMVAvalues->at(ievt) = method->GetMvaValue();
1301  }
1302 
1303  // fill cumulative mva distribution
1304 
1305 
1306 }
1307 
1308 ////////////////////////////////////////////////////////////////////////////////
1309 /// fill various monitoring histograms from information of the individual classifiers that
1310 /// have been boosted.
1311 /// of course.... this depends very much on the individual classifiers, and so far, only for
1312 /// Decision Trees, this monitoring is actually implemented
1313 
1315 {
1317 
1318  if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kDT) {
1319  TMVA::MethodDT* currentDT=dynamic_cast<TMVA::MethodDT*>(GetCurrentMethod(methodIndex));
1320  if (currentDT){
1321  if (stage == Types::kBoostProcBegin){
1322  results->Store(new TH1I("NodesBeforePruning","nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesBeforePruning");
1323  results->Store(new TH1I("NodesAfterPruning","nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesAfterPruning");
1324  }
1325 
1326  if (stage == Types::kBeforeTraining){
1327  }
1328  else if (stage == Types::kBeforeBoosting){
1329  results->GetHist("NodesBeforePruning")->SetBinContent(methodIndex+1,currentDT->GetNNodesBeforePruning());
1330  results->GetHist("NodesAfterPruning")->SetBinContent(methodIndex+1,currentDT->GetNNodes());
1331  }
1332  else if (stage == Types::kAfterBoosting){
1333 
1334  }
1335  else if (stage != Types::kBoostProcEnd){
1336  Log() << kINFO << "<Train> average number of nodes before/after pruning : "
1337  << results->GetHist("NodesBeforePruning")->GetMean() << " / "
1338  << results->GetHist("NodesAfterPruning")->GetMean()
1339  << Endl;
1340  }
1341  }
1342 
1343  }else if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kFisher) {
1344  if (stage == Types::kAfterBoosting){
1346  }
1347  }else{
1348  if (methodIndex < 3){
1349  Log() << kDEBUG << "No detailed boost monitoring for "
1350  << GetCurrentMethod(methodIndex)->GetMethodName()
1351  << " yet available " << Endl;
1352  }
1353  }
1354 
1355  //boosting plots universal for all classifiers 'typically for debug purposes only as they are not general enough'
1356 
1357  if (stage == Types::kBeforeBoosting){
1358  // if you want to display the weighted events for 2D case at each boost step:
1359  if (fDetailedMonitoring){
1360  // the following code is useful only for 2D examples - mainly illustration for debug/educational purposes:
1361  if (DataInfo().GetNVariables() == 2) {
1362  results->Store(new TH2F(Form("EventDistSig_%d",methodIndex),Form("EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1363  results->GetHist(Form("EventDistSig_%d",methodIndex))->SetMarkerColor(4);
1364  results->Store(new TH2F(Form("EventDistBkg_%d",methodIndex),Form("EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1365  results->GetHist(Form("EventDistBkg_%d",methodIndex))->SetMarkerColor(2);
1366 
1368  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1369  const Event* ev = GetEvent(ievt);
1370  Float_t w = ev->GetWeight();
1371  Float_t v0= ev->GetValue(0);
1372  Float_t v1= ev->GetValue(1);
1373  // if (ievt<3) std::cout<<ievt<<" var0="<<v0<<" var1="<<v1<<" weight="<<w<<std::endl;
1374  TH2* h;
1375  if (DataInfo().IsSignal(ev)) h=results->GetHist2D(Form("EventDistSig_%d",methodIndex));
1376  else h=results->GetHist2D(Form("EventDistBkg_%d",methodIndex));
1377  if (h) h->Fill(v0,v1,w);
1378  }
1379  }
1380  }
1381  }
1382 
1383  return;
1384 }
1385 
1386 
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:366
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3126
void SetMsgType(EMsgType t)
Definition: Configurable.h:125
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
static long int sum(long int i)
Definition: Factory.cxx:2162
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
float xmin
Definition: THbookFile.cxx:93
Random number generator class based on M.
Definition: TRandom3.h:27
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:186
THist< 1, int, THistStatContent > TH1I
Definition: THist.hxx:313
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
Definition: TRandom.cxx:414
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
long long Long64_t
Definition: RtypesCore.h:69
Double_t fROC_training
Definition: MethodBoost.h:180
void SingleTrain()
initialization
Stat_t GetSum() const
Definition: TArrayD.h:46
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:136
std::vector< TH1 *> fTestSigMVAHist
Definition: MethodBoost.h:171
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Double_t Log(Double_t x)
Definition: TMath.h:649
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
float Float_t
Definition: RtypesCore.h:53
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
Int_t GetBoostNum()
Definition: MethodBoost.h:83
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4383
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:311
Config & gConfig()
TH1 * h
Definition: legend2.C:5
MsgLogger & Log() const
Definition: Configurable.h:122
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Bool_t fDetailedMonitoring
Definition: MethodBoost.h:152
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4639
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
EAnalysisType
Definition: Types.h:125
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodBoost.cxx:94
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:349
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6763
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:587
Basic string class.
Definition: TString.h:129
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:551
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:378
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1099
int Int_t
Definition: RtypesCore.h:41
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
Definition: TDirectory.cxx:958
bool Bool_t
Definition: RtypesCore.h:59
std::vector< TH1 *> fTrainBgdMVAHist
Definition: MethodBoost.h:166
const Ranking * CreateRanking()
#define NULL
Definition: RtypesCore.h:88
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8264
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:362
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:847
void SetMethodDir(TDirectory *methodDir)
Definition: MethodBase.h:356
Double_t fOverlap_integral
Definition: MethodBoost.h:184
static void InhibitOutput()
Definition: MsgLogger.cxx:74
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
TStopwatch timer
Definition: pirndm.C:37
void AddEvent(Double_t val, Double_t weight, Int_t type)
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
const Event * GetEvent() const
Definition: MethodBase.h:733
std::vector< Double_t > fMethodWeight
DataSet * Data() const
Definition: MethodBase.h:393
Virtual base class for combining several TMVA method.
virtual ~MethodBoost(void)
destructor
TString fWeightFileDir
Definition: Config.h:96
UInt_t GetClass() const
Definition: Event.h:81
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Definition: TAttMarker.h:38
Int_t GetNNodes()
Definition: MethodDT.h:97
IONames & GetIONames()
Definition: Config.h:74
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:394
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:411
DataSetInfo & DataInfo() const
Definition: MethodBase.h:394
Class that contains all the data information.
Definition: DataSetInfo.h:60
TFile * GetFile() const
Definition: MethodBase.h:354
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:401
Class for boosting a TMVA method.
Definition: MethodBoost.h:56
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
virtual void Delete(Option_t *option="")
Delete this object.
Definition: TObject.cxx:176
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9163
TString fHistoricOption
Definition: MethodBoost.h:192
RooCmdArg Timer(Bool_t flag=kTRUE)
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
SVector< double, 2 > v
Definition: Dict.h:5
const char * GetName() const
Definition: MethodBase.h:318
ClassInfo * GetClassInfo(Int_t clNum) const
class TMVA::Config::VariablePlotting fVariablePlotting
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Double_t fBaggedSampleFraction
Definition: MethodBoost.h:156
Implementation of the GiniIndex as separation criterion.
Definition: GiniIndex.h:63
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8325
TString fTransformString
Definition: MethodBoost.h:151
unsigned int UInt_t
Definition: RtypesCore.h:42
TMarker * m
Definition: textangle.C:8
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
Ssiz_t Length() const
Definition: TString.h:388
void ScaleBoostWeight(Double_t s) const
Definition: Event.h:107
const TString & GetJobName() const
Definition: MethodBase.h:314
const TString & GetMethodName() const
Definition: MethodBase.h:315
TAxis * GetYaxis()
Definition: TH1.h:301
float xmax
Definition: THbookFile.cxx:93
Tools & gTools()
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
tomato 1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:594
virtual TDirectory * GetDirectory(const char *apath, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory named "apath".
Bool_t IsSilentFile()
Definition: MethodBase.h:363
REAL epsilon
Definition: triangle.c:617
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
void CreateMVAHistorgrams()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:452
UInt_t GetNVariables() const
Definition: MethodBase.h:329
const Bool_t kFALSE
Definition: RtypesCore.h:92
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
Class for categorizing the phase space.
TString & Remove(Ssiz_t pos)
Definition: TString.h:621
int Ssiz_t
Definition: RtypesCore.h:63
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString fBoostedMethodOptions
Definition: MethodBoost.h:160
Double_t Exp(Double_t x)
Definition: TMath.h:622
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:438
Bool_t fMonitorBoostedMethod
Definition: MethodBoost.h:162
#define ClassImp(name)
Definition: Rtypes.h:336
void RerouteTransformationHandler(TransformationHandler *fTargetTransformation)
Definition: MethodBase.h:387
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:34
std::vector< TH1 *> fTrainSigMVAHist
Definition: MethodBoost.h:165
TString fBoostedMethodTitle
Definition: MethodBoost.h:159
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:130
int type
Definition: TGX11.cxx:120
void SetBoostWeight(Double_t w) const
Definition: Event.h:106
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
The TH1 histogram class.
Definition: TH1.h:56
Double_t fMethodError
Definition: MethodBoost.h:178
void AddPreDefVal(const T &)
Definition: Configurable.h:168
void GetHelpMessage() const
Get help message text.
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------— ...
UInt_t GetNumber() const
Definition: ClassInfo.h:65
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:96
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:428
void ComputeStat(const std::vector< TMVA::Event *> &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:214
const TString & GetOptions() const
Definition: Configurable.h:84
virtual void TestClassification()
initialization
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1660
TString fBoostedMethodName
Definition: MethodBoost.h:158
#define REGISTER_METHOD(CLASS)
for example
std::vector< IMethod * > fMethods
Abstract ClassifierFactory template that handles arbitrary types.
Double_t fAdaBoostBeta
Definition: MethodBoost.h:154
Double_t GetMVAProbAt(Double_t value)
TH2 * GetHist2D(const TString &alias) const
Definition: Results.cxx:139
DataSetManager * fDataSetManager
Definition: MethodBoost.h:188
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:435
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
Class that is the base-class for a vector of result.
Definition: Results.h:57
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
const Event * fTmpEvent
Definition: MethodBase.h:396
void SetWeightFileDir(TString fileDir)
set directory of weight file
Double_t GetOriginalWeight() const
Definition: Event.h:79
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:193
void InitHistos()
initialisation routine
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:344
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:310
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:215
Bool_t IsSignal(const Event *ev) const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:190
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:421
A TTree object has a header with a name and a title.
Definition: TTree.h:78
TTree * fMonitorTree
Definition: MethodBoost.h:176
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:86
virtual Int_t GetNbinsX() const
Definition: TH1.h:277
std::vector< TH1 *> fBTrainSigMVAHist
Definition: MethodBoost.h:168
static void EnableOutput()
Definition: MsgLogger.cxx:75
Double_t fBoostWeight
Definition: MethodBoost.h:177
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:155
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:317
std::vector< TH1 *> fBTrainBgdMVAHist
Definition: MethodBoost.h:169
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
const Bool_t kTRUE
Definition: RtypesCore.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
Types::EMVA GetMethodType() const
Definition: MethodBase.h:317
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
virtual void TestClassification()
initialization
const Event * GetEvent() const
Definition: DataSet.cxx:202
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:420
std::vector< TH1 *> fTestBgdMVAHist
Definition: MethodBoost.h:173
TAxis * GetXaxis()
Definition: TH1.h:300
Analysis of Boosted Decision Trees.
Definition: MethodDT.h:49
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:829
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:348
const char * Data() const
Definition: TString.h:347
Bool_t IsModelPersistence()
Definition: MethodBase.h:367