Logo ROOT  
Reference Guide
MethodBoost.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 /*! \class TMVA::MethodBoost
33 \ingroup TMVA
34 
35 Class for boosting a TMVA method
36 
37 This class is meant to boost a single classifier. Boosting means
38 training the classifier a few times. Every time the weights of the
39 events are modified according to how well the classifier performed
40 on the test sample.
41 
42 */
43 
44 #include "TMVA/MethodBoost.h"
45 
46 #include "TMVA/ClassifierFactory.h"
47 #include "TMVA/Config.h"
48 #include "TMVA/Configurable.h"
49 #include "TMVA/DataSet.h"
50 #include "TMVA/DataSetInfo.h"
51 #include "TMVA/IMethod.h"
52 #include "TMVA/MethodBase.h"
53 #include "TMVA/MethodCategory.h"
55 #include "TMVA/MethodDT.h"
56 #include "TMVA/MethodFisher.h"
57 #include "TMVA/PDF.h"
58 #include "TMVA/Results.h"
59 #include "TMVA/Timer.h"
60 #include "TMVA/Tools.h"
61 #include "TMVA/Types.h"
62 
63 #include "TMVA/SeparationBase.h"
65 #include "TMVA/GiniIndex.h"
66 #include "TMVA/CrossEntropy.h"
69 
70 #include "TRandom3.h"
71 #include "TMath.h"
72 #include "TH1F.h"
73 #include "TH2F.h"
74 #include "TSpline.h"
75 #include "TDirectory.h"
76 #include "TTree.h"
77 
78 #include <iostream>
79 #include <algorithm>
80 #include <vector>
81 #include <cmath>
82 
83 
84 REGISTER_METHOD(Boost)
85 
87 
88 ////////////////////////////////////////////////////////////////////////////////
89 
91  const TString& methodTitle,
92  DataSetInfo& theData,
93  const TString& theOption ) :
94  TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption)
95  , fBoostNum(0)
96  , fDetailedMonitoring(kFALSE)
97  , fAdaBoostBeta(0)
98  , fRandomSeed(0)
99  , fBaggedSampleFraction(0)
100  , fBoostedMethodTitle(methodTitle)
101  , fBoostedMethodOptions(theOption)
102  , fMonitorBoostedMethod(kFALSE)
103  , fMonitorTree(0)
104  , fBoostWeight(0)
105  , fMethodError(0)
106  , fROC_training(0.0)
107  , fOverlap_integral(0.0)
108  , fMVAvalues(0)
109 {
110  fMVAvalues = new std::vector<Float_t>;
111  fDataSetManager = NULL;
113 }
114 
115 ////////////////////////////////////////////////////////////////////////////////
116 
118  const TString& theWeightFile)
119  : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile)
120  , fBoostNum(0)
121  , fDetailedMonitoring(kFALSE)
122  , fAdaBoostBeta(0)
123  , fRandomSeed(0)
124  , fBaggedSampleFraction(0)
125  , fBoostedMethodTitle("")
126  , fBoostedMethodOptions("")
127  , fMonitorBoostedMethod(kFALSE)
128  , fMonitorTree(0)
129  , fBoostWeight(0)
130  , fMethodError(0)
131  , fROC_training(0.0)
132  , fOverlap_integral(0.0)
133  , fMVAvalues(0)
134 {
135  fMVAvalues = new std::vector<Float_t>;
136  fDataSetManager = NULL;
138 }
139 
140 ////////////////////////////////////////////////////////////////////////////////
141 /// destructor
142 
144 {
145  fMethodWeight.clear();
146 
147  // the histogram themselves are deleted when the file is closed
148 
149  fTrainSigMVAHist.clear();
150  fTrainBgdMVAHist.clear();
151  fBTrainSigMVAHist.clear();
152  fBTrainBgdMVAHist.clear();
153  fTestSigMVAHist.clear();
154  fTestBgdMVAHist.clear();
155 
156  if (fMVAvalues) {
157  delete fMVAvalues;
158  fMVAvalues = 0;
159  }
160 }
161 
162 
163 ////////////////////////////////////////////////////////////////////////////////
164 /// Boost can handle classification with 2 classes and regression with one regression-target
165 
167 {
168  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
169  // if (type == Types::kRegression && numberTargets == 1) return kTRUE;
170  return kFALSE;
171 }
172 
173 
174 ////////////////////////////////////////////////////////////////////////////////
175 
177 {
178  DeclareOptionRef( fBoostNum = 1, "Boost_Num",
179  "Number of times the classifier is boosted" );
180 
181  DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod",
182  "Write monitoring histograms for each boosted classifier" );
183 
184  DeclareOptionRef( fDetailedMonitoring = kFALSE, "Boost_DetailedMonitoring",
185  "Produce histograms for detailed boost monitoring" );
186 
187  DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
188  AddPreDefVal(TString("RealAdaBoost"));
189  AddPreDefVal(TString("AdaBoost"));
190  AddPreDefVal(TString("Bagging"));
191 
192  DeclareOptionRef(fBaggedSampleFraction=.6,"Boost_BaggedSampleFraction","Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
193 
194  DeclareOptionRef( fAdaBoostBeta = 1.0, "Boost_AdaBoostBeta",
195  "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
196 
197  DeclareOptionRef( fTransformString = "step", "Boost_Transform",
198  "Type of transform applied to every boosted method linear, log, step" );
199  AddPreDefVal(TString("step"));
200  AddPreDefVal(TString("linear"));
201  AddPreDefVal(TString("log"));
202  AddPreDefVal(TString("gauss"));
203 
204  DeclareOptionRef( fRandomSeed = 0, "Boost_RandomSeed",
205  "Seed for random number generator used for bagging" );
206 
207  TMVA::MethodCompositeBase::fMethods.reserve(fBoostNum);
208 }
209 
210 ////////////////////////////////////////////////////////////////////////////////
211 /// options that are used ONLY for the READER to ensure backward compatibility
212 /// they are hence without any effect (the reader is only reading the training
213 /// options that HAD been used at the training of the .xml weight file at hand
214 
216 {
217 
219 
220  DeclareOptionRef( fHistoricOption = "ByError", "Boost_MethodWeightType",
221  "How to set the final weight of the boosted classifiers" );
222  AddPreDefVal(TString("ByError"));
223  AddPreDefVal(TString("Average"));
224  AddPreDefVal(TString("ByROC"));
225  AddPreDefVal(TString("ByOverlap"));
226  AddPreDefVal(TString("LastMethod"));
227 
228  DeclareOptionRef( fHistoricOption = "step", "Boost_Transform",
229  "Type of transform applied to every boosted method linear, log, step" );
230  AddPreDefVal(TString("step"));
231  AddPreDefVal(TString("linear"));
232  AddPreDefVal(TString("log"));
233  AddPreDefVal(TString("gauss"));
234 
235  // this option here
236  //DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
237  // still exists, but these two possible values
238  AddPreDefVal(TString("HighEdgeGauss"));
239  AddPreDefVal(TString("HighEdgeCoPara"));
240  // have been deleted .. hope that works :)
241 
242  DeclareOptionRef( fHistoricBoolOption, "Boost_RecalculateMVACut",
243  "Recalculate the classifier MVA Signallike cut at every boost iteration" );
244 
245 }
246 
247 ////////////////////////////////////////////////////////////////////////////////
248 /// just registering the string from which the boosted classifier will be created
249 
250 Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption )
251 {
252  fBoostedMethodName = Types::Instance().GetMethodName( theMethod );
253  fBoostedMethodTitle = methodTitle;
254  fBoostedMethodOptions = theOption;
255  TString opts=theOption;
256  opts.ToLower();
257  // if (opts.Contains("vartransform")) Log() << kFATAL << "It is not possible to use boost in conjunction with variable transform. Please remove either Boost_Num or VarTransform from the option string"<< methodTitle<<Endl;
258 
259  return kTRUE;
260 }
261 
262 ////////////////////////////////////////////////////////////////////////////////
263 
265 {
266 }
267 
268 ////////////////////////////////////////////////////////////////////////////////
269 /// initialisation routine
270 
272 {
273 
274  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
275 
276  results->Store(new TH1F("MethodWeight","Normalized Classifier Weight",fBoostNum,0,fBoostNum),"ClassifierWeight");
277  results->Store(new TH1F("BoostWeight","Boost Weight",fBoostNum,0,fBoostNum),"BoostWeight");
278  results->Store(new TH1F("ErrFraction","Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),"ErrorFraction");
279  if (fDetailedMonitoring){
280  results->Store(new TH1F("ROCIntegral_test","ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegral_test");
281  results->Store(new TH1F("ROCIntegralBoosted_test","ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_test");
282  results->Store(new TH1F("ROCIntegral_train","ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),"ROCIntegral_train");
283  results->Store(new TH1F("ROCIntegralBoosted_train","ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_train");
284  results->Store(new TH1F("OverlapIntegal_train","Overlap integral (training sample)",fBoostNum,0,fBoostNum),"Overlap");
285  }
286 
287 
288  results->GetHist("ClassifierWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
289  results->GetHist("ClassifierWeight")->GetYaxis()->SetTitle("Classifier Weight");
290  results->GetHist("BoostWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
291  results->GetHist("BoostWeight")->GetYaxis()->SetTitle("Boost Weight");
292  results->GetHist("ErrorFraction")->GetXaxis()->SetTitle("Index of boosted classifier");
293  results->GetHist("ErrorFraction")->GetYaxis()->SetTitle("Error Fraction");
294  if (fDetailedMonitoring){
295  results->GetHist("ROCIntegral_test")->GetXaxis()->SetTitle("Index of boosted classifier");
296  results->GetHist("ROCIntegral_test")->GetYaxis()->SetTitle("ROC integral of single classifier");
297  results->GetHist("ROCIntegralBoosted_test")->GetXaxis()->SetTitle("Number of boosts");
298  results->GetHist("ROCIntegralBoosted_test")->GetYaxis()->SetTitle("ROC integral boosted");
299  results->GetHist("ROCIntegral_train")->GetXaxis()->SetTitle("Index of boosted classifier");
300  results->GetHist("ROCIntegral_train")->GetYaxis()->SetTitle("ROC integral of single classifier");
301  results->GetHist("ROCIntegralBoosted_train")->GetXaxis()->SetTitle("Number of boosts");
302  results->GetHist("ROCIntegralBoosted_train")->GetYaxis()->SetTitle("ROC integral boosted");
303  results->GetHist("Overlap")->GetXaxis()->SetTitle("Index of boosted classifier");
304  results->GetHist("Overlap")->GetYaxis()->SetTitle("Overlap integral");
305  }
306 
307  results->Store(new TH1F("SoverBtotal","S/B in reweighted training sample",fBoostNum,0,fBoostNum),"SoverBtotal");
308  results->GetHist("SoverBtotal")->GetYaxis()->SetTitle("S/B (boosted sample)");
309  results->GetHist("SoverBtotal")->GetXaxis()->SetTitle("Index of boosted classifier");
310 
311  results->Store(new TH1F("SeparationGain","SeparationGain",fBoostNum,0,fBoostNum),"SeparationGain");
312  results->GetHist("SeparationGain")->GetYaxis()->SetTitle("SeparationGain");
313  results->GetHist("SeparationGain")->GetXaxis()->SetTitle("Index of boosted classifier");
314 
315 
316 
317  fMonitorTree= new TTree("MonitorBoost","Boost variables");
318  fMonitorTree->Branch("iMethod",&fCurrentMethodIdx,"iMethod/I");
319  fMonitorTree->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
320  fMonitorTree->Branch("errorFraction",&fMethodError,"errorFraction/D");
321  fMonitorBoostedMethod = kTRUE;
322 
323 }
324 
325 
326 ////////////////////////////////////////////////////////////////////////////////
327 
329 {
330  Log() << kDEBUG << "CheckSetup: fBoostType="<<fBoostType << Endl;
331  Log() << kDEBUG << "CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<Endl;
332  Log() << kDEBUG << "CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
333  Log() << kDEBUG << "CheckSetup: fMethodError="<<fMethodError<<Endl;
334  Log() << kDEBUG << "CheckSetup: fBoostNum="<<fBoostNum << Endl;
335  Log() << kDEBUG << "CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
336  Log() << kDEBUG << "CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
337  Log() << kDEBUG << "CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
338  Log() << kDEBUG << "CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod? "true" : "false") << Endl;
339  Log() << kDEBUG << "CheckSetup: MName=" << fBoostedMethodName << " Title="<< fBoostedMethodTitle<< Endl;
340  Log() << kDEBUG << "CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
341  Log() << kDEBUG << "CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
342  Log() << kDEBUG << "CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx << Endl;
343  if (fMethods.size()>0) Log() << kDEBUG << "CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
344  Log() << kDEBUG << "CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
345  if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
346  Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl;
347 
348 }
349 ////////////////////////////////////////////////////////////////////////////////
350 
352 {
353  TDirectory* methodDir( 0 );
354  TString dirName,dirTitle;
355  Int_t StopCounter=0;
356  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
357 
358 
359  InitHistos();
360 
361  if (Data()->GetNTrainingEvents()==0) Log() << kFATAL << "<Train> Data() has zero events" << Endl;
362  Data()->SetCurrentType(Types::kTraining);
363 
364  if (fMethods.size() > 0) fMethods.clear();
365  fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
366 
367  Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " with title " << fBoostedMethodTitle << " Classifiers ... patience please" << Endl;
368  Timer timer( fBoostNum, GetName() );
369 
370  ResetBoostWeights();
371 
372  // clean boosted method options
373  CleanBoostOptions();
374 
375 
376  // remove transformations for individual boosting steps
377  // the transformation of the main method will be rerouted to each of the boost steps
378  Ssiz_t varTrafoStart=fBoostedMethodOptions.Index("~VarTransform=");
379  if (varTrafoStart >0) {
380  Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(":",varTrafoStart);
381  if (varTrafoEnd<varTrafoStart)
382  varTrafoEnd=fBoostedMethodOptions.Length();
383  fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
384  }
385 
386  //
387  // training and boosting the classifiers
388  for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
389  // the first classifier shows the option string output, the rest not
390  if (fCurrentMethodIdx>0) TMVA::MsgLogger::InhibitOutput();
391 
393  fBoostedMethodName.Data(), GetJobName(), Form("%s_B%04i", fBoostedMethodTitle.Data(), fCurrentMethodIdx),
394  DataInfo(), fBoostedMethodOptions);
396 
397  // suppressing the rest of the classifier output the right way
398  fCurrentMethod = (dynamic_cast<MethodBase*>(method));
399 
400  if (fCurrentMethod==0) {
401  Log() << kFATAL << "uups.. guess the booking of the " << fCurrentMethodIdx << "-th classifier somehow failed" << Endl;
402  return; // hope that makes coverity happy (as if fears I might use the pointer later on, not knowing that FATAL exits
403  }
404 
405  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
406  if (fCurrentMethod->GetMethodType() == Types::kCategory) { // DSMTEST
407  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(fCurrentMethod)); // DSMTEST
408  if (!methCat) // DSMTEST
409  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl; // DSMTEST
410  methCat->fDataSetManager = fDataSetManager; // DSMTEST
411  } // DSMTEST
412 
413  fCurrentMethod->SetMsgType(kWARNING);
414  fCurrentMethod->SetupMethod();
415  fCurrentMethod->ParseOptions();
416  // put SetAnalysisType here for the needs of MLP
417  fCurrentMethod->SetAnalysisType( GetAnalysisType() );
418  fCurrentMethod->ProcessSetup();
419  fCurrentMethod->CheckSetup();
420 
421 
422  // reroute transformationhandler
423  fCurrentMethod->RerouteTransformationHandler (&(this->GetTransformationHandler()));
424 
425 
426  // creating the directory of the classifier
427  if(!IsSilentFile())
428  {
429  if (fMonitorBoostedMethod) {
430  methodDir=GetFile()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
431  if (methodDir==0) {
432  methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
433  }
434  fCurrentMethod->SetMethodDir(methodDir);
435  fCurrentMethod->BaseDir()->cd();
436  }
437  }
438 
439  // training
440  TMVA::MethodCompositeBase::fMethods.push_back(method);
441  timer.DrawProgressBar( fCurrentMethodIdx );
442  if (fCurrentMethodIdx==0) MonitorBoost(Types::kBoostProcBegin,fCurrentMethodIdx);
443  MonitorBoost(Types::kBeforeTraining,fCurrentMethodIdx);
444  TMVA::MsgLogger::InhibitOutput(); //suppressing Logger outside the method
445  if (fBoostType=="Bagging") Bagging(); // you want also to train the first classifier on a bagged sample
446  SingleTrain();
448  if(!IsSilentFile())fCurrentMethod->WriteMonitoringHistosToFile();
449 
450  // calculate MVA values of current method for all events in training sample
451  // (used later on to get 'misclassified events' etc for the boosting
452  CalcMVAValues();
453 
454  if(!IsSilentFile()) if (fCurrentMethodIdx==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
455 
456  // get ROC integral and overlap integral for single method on
457  // training sample if fMethodWeightType == "ByROC" or the user
458  // wants detailed monitoring
459 
460  // boosting (reweight training sample)
461  MonitorBoost(Types::kBeforeBoosting,fCurrentMethodIdx);
462  SingleBoost(fCurrentMethod);
463 
464  MonitorBoost(Types::kAfterBoosting,fCurrentMethodIdx);
465  results->GetHist("BoostWeight")->SetBinContent(fCurrentMethodIdx+1,fBoostWeight);
466  results->GetHist("ErrorFraction")->SetBinContent(fCurrentMethodIdx+1,fMethodError);
467 
468  if (fDetailedMonitoring) {
469  fROC_training = GetBoostROCIntegral(kTRUE, Types::kTraining, kTRUE);
470  results->GetHist("ROCIntegral_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kTRUE, Types::kTesting));
471  results->GetHist("ROCIntegralBoosted_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
472  results->GetHist("ROCIntegral_train")->SetBinContent(fCurrentMethodIdx+1, fROC_training);
473  results->GetHist("ROCIntegralBoosted_train")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
474  results->GetHist("Overlap")->SetBinContent(fCurrentMethodIdx+1, fOverlap_integral);
475  }
476 
477 
478 
479  fMonitorTree->Fill();
480 
481  // stop boosting if needed when error has reached 0.5
482  // thought of counting a few steps, but it doesn't seem to be necessary
483  Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl;
484  if (fMethodError > 0.49999) StopCounter++;
485  if (StopCounter > 0 && fBoostType != "Bagging") {
486  timer.DrawProgressBar( fBoostNum );
487  fBoostNum = fCurrentMethodIdx+1;
488  Log() << kINFO << "Error rate has reached 0.5 ("<< fMethodError<<"), boosting process stopped at #" << fBoostNum << " classifier" << Endl;
489  if (fBoostNum < 5)
490  Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fAdaBoostBeta << ", try reducing it." <<Endl;
491  break;
492  }
493  }
494 
495  //as MethodBoost acts not on a private event sample (like MethodBDT does), we need to remember not
496  // to leave "boosted" events to the next classifier in the factory
497 
498  ResetBoostWeights();
499 
500  Timer* timer1= new Timer( fBoostNum, GetName() );
501  // normalizing the weights of the classifiers
502  for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
503  // performing post-boosting actions
504 
505  timer1->DrawProgressBar( fCurrentMethodIdx );
506 
507  if (fCurrentMethodIdx==fBoostNum) {
508  Log() << kINFO << "Elapsed time: " << timer1->GetElapsedTime()
509  << " " << Endl;
510  }
511 
512  TH1F* tmp = dynamic_cast<TH1F*>( results->GetHist("ClassifierWeight") );
513  if (tmp) tmp->SetBinContent(fCurrentMethodIdx+1,fMethodWeight[fCurrentMethodIdx]);
514 
515  }
516 
517  // Ensure that in case of only 1 boost the method weight equals
518  // 1.0. This avoids unexpected behaviour in case of very bad
519  // classifiers which have fBoostWeight=1 or fMethodError=0.5,
520  // because their weight would be set to zero. This behaviour is
521  // not ok if one boosts just one time.
522  if (fMethods.size()==1) fMethodWeight[0] = 1.0;
523 
524  MonitorBoost(Types::kBoostProcEnd);
525 
526  delete timer1;
527 }
528 
529 ////////////////////////////////////////////////////////////////////////////////
530 
532 {
533  fBoostedMethodOptions=GetOptions();
534 }
535 
536 ////////////////////////////////////////////////////////////////////////////////
537 
539 {
540  if (fBoostNum <=0) Log() << kFATAL << "CreateHistograms called before fBoostNum is initialized" << Endl;
541  // calculating histograms boundaries and creating histograms..
542  // nrms = number of rms around the average to use for outline (of the 0 classifier)
543  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
544  Int_t signalClass = 0;
545  if (DataInfo().GetClassInfo("Signal") != 0) {
546  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
547  }
548  gTools().ComputeStat( GetEventCollection( Types::kMaxTreeType ), fMVAvalues,
549  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
550 
552  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
553  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
554 
555  // creating all the histograms
556  for (UInt_t imtd=0; imtd<fBoostNum; imtd++) {
557  fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) );
558  fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i", imtd), "MVA_Train_B", fNbins, xmin, xmax ) );
559  fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
560  fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
561  fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i", imtd), "MVA_Test_S", fNbins, xmin, xmax ) );
562  fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i", imtd), "MVA_Test_B", fNbins, xmin, xmax ) );
563  }
564 }
565 
566 ////////////////////////////////////////////////////////////////////////////////
567 /// resetting back the boosted weights of the events to 1
568 
570 {
571  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
572  const Event *ev = Data()->GetEvent(ievt);
573  ev->SetBoostWeight( 1.0 );
574  }
575 }
576 
577 ////////////////////////////////////////////////////////////////////////////////
578 
580 {
581  TDirectory* dir=0;
582  if (fMonitorBoostedMethod) {
583  for (UInt_t imtd=0;imtd<fBoostNum;imtd++) {
584 
585  //writing the histograms in the specific classifier's directory
586  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]);
587  if (!m) continue;
588  dir = m->BaseDir();
589  dir->cd();
590  fTrainSigMVAHist[imtd]->SetDirectory(dir);
591  fTrainSigMVAHist[imtd]->Write();
592  fTrainBgdMVAHist[imtd]->SetDirectory(dir);
593  fTrainBgdMVAHist[imtd]->Write();
594  fBTrainSigMVAHist[imtd]->SetDirectory(dir);
595  fBTrainSigMVAHist[imtd]->Write();
596  fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
597  fBTrainBgdMVAHist[imtd]->Write();
598  }
599  }
600 
601  // going back to the original folder
602  BaseDir()->cd();
603 
604  fMonitorTree->Write();
605 }
606 
607 ////////////////////////////////////////////////////////////////////////////////
608 
610 {
612  if (fMonitorBoostedMethod) {
613  UInt_t nloop = fTestSigMVAHist.size();
614  if (fMethods.size()<nloop) nloop = fMethods.size();
615  //running over all the events and populating the test MVA histograms
616  Data()->SetCurrentType(Types::kTesting);
617  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
618  const Event* ev = GetEvent(ievt);
619  Float_t w = ev->GetWeight();
620  if (DataInfo().IsSignal(ev)) {
621  for (UInt_t imtd=0; imtd<nloop; imtd++) {
622  fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
623  }
624  }
625  else {
626  for (UInt_t imtd=0; imtd<nloop; imtd++) {
627  fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
628  }
629  }
630  }
631  Data()->SetCurrentType(Types::kTraining);
632  }
633 }
634 
635 ////////////////////////////////////////////////////////////////////////////////
636 
638 {
640  if (treetype==Types::kTraining) return;
641  UInt_t nloop = fTestSigMVAHist.size();
642  if (fMethods.size()<nloop) nloop = fMethods.size();
643  if (fMonitorBoostedMethod) {
644  TDirectory* dir=0;
645  for (UInt_t imtd=0;imtd<nloop;imtd++) {
646  //writing the histograms in the specific classifier's directory
647  MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]);
648  if (!mva) continue;
649  dir = mva->BaseDir();
650  if (dir==0) continue;
651  dir->cd();
652  fTestSigMVAHist[imtd]->SetDirectory(dir);
653  fTestSigMVAHist[imtd]->Write();
654  fTestBgdMVAHist[imtd]->SetDirectory(dir);
655  fTestBgdMVAHist[imtd]->Write();
656  }
657  }
658 }
659 
660 ////////////////////////////////////////////////////////////////////////////////
661 /// process user options
662 
664 {
665 }
666 
667 ////////////////////////////////////////////////////////////////////////////////
668 /// initialization
669 
671 {
672  Data()->SetCurrentType(Types::kTraining);
673  MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod());
674  if (meth){
675  meth->SetSilentFile(IsSilentFile());
676  if(IsModelPersistence()){
677  TString _fFileDir= DataInfo().GetName();
678  _fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
679  meth->SetWeightFileDir(_fFileDir);
680  }
681  meth->SetModelPersistence(IsModelPersistence());
682  meth->TrainMethod();
683  }
684 }
685 
686 ////////////////////////////////////////////////////////////////////////////////
687 /// find the CUT on the individual MVA that defines an event as
688 /// correct or misclassified (to be used in the boosting process)
689 
691 {
692  if (!method || method->GetMethodType() == Types::kDT ){ return;}
693 
694  // creating a fine histograms containing the error rate
695  const Int_t nBins=10001;
696  Double_t minMVA=150000;
697  Double_t maxMVA=-150000;
698  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
699  GetEvent(ievt);
700  Double_t val=method->GetMvaValue();
701  //Helge .. I think one could very well use fMVAValues for that ... -->to do
702  if (val>maxMVA) maxMVA=val;
703  if (val<minMVA) minMVA=val;
704  }
705  maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
706 
707  Double_t sum = 0.;
708 
709  TH1D *mvaS = new TH1D(Form("MVAS_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
710  TH1D *mvaB = new TH1D(Form("MVAB_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
711  TH1D *mvaSC = new TH1D(Form("MVASC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
712  TH1D *mvaBC = new TH1D(Form("MVABC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
713 
714 
715  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
716  if (fDetailedMonitoring){
717  results->Store(mvaS, Form("MVAS_%d",fCurrentMethodIdx));
718  results->Store(mvaB, Form("MVAB_%d",fCurrentMethodIdx));
719  results->Store(mvaSC,Form("MVASC_%d",fCurrentMethodIdx));
720  results->Store(mvaBC,Form("MVABC_%d",fCurrentMethodIdx));
721  }
722 
723  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
724 
725  Double_t weight = GetEvent(ievt)->GetWeight();
726  Double_t mvaVal=method->GetMvaValue();
727  sum +=weight;
728  if (DataInfo().IsSignal(GetEvent(ievt))){
729  mvaS->Fill(mvaVal,weight);
730  }else {
731  mvaB->Fill(mvaVal,weight);
732  }
733  }
734  SeparationBase *sepGain;
735 
736 
737  // Boosting should use Misclassification not Gini Index (changed, Helge 31.5.2013)
738  // WARNING! It works with Misclassification only if you fix the signal to
739  // background at every step. Strangely enough, there are better results
740  // ( as seen with BDT ) if you use Gini Index, and accept that sometimes no
741  // sensible cut is found - i.e. the cut is then outside the MVA value range,
742  // all events are classified as background and then according to the Boost
743  // algorithm something is renormed 'automatically' ... so that in the next
744  // step again the result is something sensible.
745  // Strange ... that THIS is supposed to be right?
746 
747  // SeparationBase *sepGain2 = new MisClassificationError();
748  //sepGain = new MisClassificationError();
749  sepGain = new GiniIndex();
750  //sepGain = new CrossEntropy();
751 
752  Double_t sTot = mvaS->GetSum();
753  Double_t bTot = mvaB->GetSum();
754 
755  mvaSC->SetBinContent(1,mvaS->GetBinContent(1));
756  mvaBC->SetBinContent(1,mvaB->GetBinContent(1));
757  Double_t sSel=0;
758  Double_t bSel=0;
759  Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
760  Double_t mvaCut=mvaSC->GetBinLowEdge(1);
761  Double_t sSelCut=sSel;
762  Double_t bSelCut=bSel;
763  // std::cout << "minMVA =" << minMVA << " maxMVA = " << maxMVA << " width = " << mvaSC->GetBinWidth(1) << std::endl;
764 
765  // for (Int_t ibin=1;ibin<=nBins;ibin++) std::cout << " cutvalues[" << ibin<<"]="<<mvaSC->GetBinLowEdge(ibin) << " " << mvaSC->GetBinCenter(ibin) << std::endl;
766  Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal
767  for (Int_t ibin=1;ibin<=nBins;ibin++){
768  mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1));
769  mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1));
770 
771  sSel=mvaSC->GetBinContent(ibin);
772  bSel=mvaBC->GetBinContent(ibin);
773 
774  // if (ibin==nBins){
775  // std::cout << "Last bin s="<< sSel <<" b="<<bSel << " s="<< sTot-sSel <<" b="<<bTot-bSel << endl;
776  // }
777 
778  if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
779  // && (mvaSC->GetBinCenter(ibin) >0 || (fCurrentMethodIdx+1)%2 )
780  ){
781  separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
782  // mvaCut=mvaSC->GetBinCenter(ibin);
783  mvaCut=mvaSC->GetBinLowEdge(ibin+1);
784  // if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1;
785  if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
786  else mvaCutOrientation=1;
787  sSelCut=sSel;
788  bSelCut=bSel;
789  // std::cout << "new cut at " << mvaCut << "with s="<<sTot-sSel << " b="<<bTot-bSel << std::endl;
790  }
791  /*
792  Double_t ori;
793  if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) ori=-1;
794  else ori=1;
795  std::cout << ibin << " mvacut="<<mvaCut
796  << " sTot=" << sTot
797  << " bTot=" << bTot
798  << " sSel=" << sSel
799  << " bSel=" << bSel
800  << " s/b(1)=" << sSel/bSel
801  << " s/b(2)=" << (sTot-sSel)/(bTot-bSel)
802  << " sepGain="<<sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
803  << " sepGain2="<<sepGain2->GetSeparationGain(sSel,bSel,sTot,bTot)
804  << " " <<ori
805  << std::endl;
806  */
807 
808  }
809 
810  if (0){
811  double parentIndex=sepGain->GetSeparationIndex(sTot,bTot);
812  double leftIndex =sepGain->GetSeparationIndex(sSelCut,bSelCut);
813  double rightIndex =sepGain->GetSeparationIndex(sTot-sSelCut,bTot-bSelCut);
814  std::cout
815  << " sTot=" << sTot
816  << " bTot=" << bTot
817  << " s="<<sSelCut
818  << " b="<<bSelCut
819  << " s2="<<(sTot-sSelCut)
820  << " b2="<<(bTot-bSelCut)
821  << " s/b(1)=" << sSelCut/bSelCut
822  << " s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
823  << " index before cut=" << parentIndex
824  << " after: left=" << leftIndex
825  << " after: right=" << rightIndex
826  << " sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
827  << " sepGain="<<separationGain
828  << " sepGain="<<sepGain->GetSeparationGain(sSelCut,bSelCut,sTot,bTot)
829  << " cut=" << mvaCut
830  << " idx="<<fCurrentMethodIdx
831  << " cutOrientation="<<mvaCutOrientation
832  << std::endl;
833  }
834  method->SetSignalReferenceCut(mvaCut);
835  method->SetSignalReferenceCutOrientation(mvaCutOrientation);
836 
837  results->GetHist("SeparationGain")->SetBinContent(fCurrentMethodIdx+1,separationGain);
838 
839 
840  Log() << kDEBUG << "(old step) Setting method cut to " <<method->GetSignalReferenceCut()<< Endl;
841 
842  if(IsSilentFile())
843  {
844  mvaS ->Delete();
845  mvaB ->Delete();
846  mvaSC->Delete();
847  mvaBC->Delete();
848  }
849 }
850 
851 ////////////////////////////////////////////////////////////////////////////////
852 
854 {
855  Double_t returnVal=-1;
856 
857 
858  if (fBoostType=="AdaBoost") returnVal = this->AdaBoost (method,1);
859  else if (fBoostType=="RealAdaBoost") returnVal = this->AdaBoost (method,0);
860  else if (fBoostType=="Bagging") returnVal = this->Bagging ();
861  else{
862  Log() << kFATAL << "<Boost> unknown boost option " << fBoostType<< " called" << Endl;
863  }
864  fMethodWeight.push_back(returnVal);
865  return returnVal;
866 }
867 ////////////////////////////////////////////////////////////////////////////////
868 /// the standard (discrete or real) AdaBoost algorithm
869 
871 {
872  if (!method) {
873  Log() << kWARNING << " AdaBoost called without classifier reference - needed for calculating AdaBoost " << Endl;
874  return 0;
875  }
876 
877  Float_t w,v; Bool_t sig=kTRUE;
878  Double_t sumAll=0, sumWrong=0;
879  Bool_t* WrongDetection=new Bool_t[GetNEvents()];
880  QuickMVAProbEstimator *MVAProb=NULL;
881 
882  if (discreteAdaBoost) {
883  FindMVACut(method);
884  Log() << kDEBUG << " individual mva cut value = " << method->GetSignalReferenceCut() << Endl;
885  } else {
886  MVAProb=new TMVA::QuickMVAProbEstimator();
887  // the RealAdaBoost does use a simple "yes (signal)" or "no (background)"
888  // answer from your single MVA, but a "signal probability" instead (in the BDT case,
889  // that would be the 'purity' in the leaf node. For some MLP parameter, the MVA output
890  // can also interpreted as a probability, but here I try a general approach to get this
891  // probability from the MVA distributions...
892 
893  for (Long64_t evt=0; evt<GetNEvents(); evt++) {
894  const Event* ev = Data()->GetEvent(evt);
895  MVAProb->AddEvent(fMVAvalues->at(evt),ev->GetWeight(),ev->GetClass());
896  }
897  }
898 
899 
900  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
901 
902  // finding the wrong events and calculating their total weights
903  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
904  const Event* ev = GetEvent(ievt);
905  sig=DataInfo().IsSignal(ev);
906  v = fMVAvalues->at(ievt);
907  w = ev->GetWeight();
908  sumAll += w;
909  if(!IsSilentFile())
910  {
911  if (fMonitorBoostedMethod) {
912  if (sig) {
913  fBTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,w);
914  fTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,ev->GetOriginalWeight());
915  }
916  else {
917  fBTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,w);
918  fTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,ev->GetOriginalWeight());
919  }
920  }
921  }
922 
923  if (discreteAdaBoost){
924  if (sig == method->IsSignalLike(fMVAvalues->at(ievt))){
925  WrongDetection[ievt]=kFALSE;
926  }else{
927  WrongDetection[ievt]=kTRUE;
928  sumWrong+=w;
929  }
930  }else{
931  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
932  mvaProb = 2*(mvaProb-0.5);
933  Int_t trueType;
934  if (DataInfo().IsSignal(ev)) trueType = 1;
935  else trueType = -1;
936  sumWrong+= w*trueType*mvaProb;
937  }
938  }
939 
940  fMethodError=sumWrong/sumAll;
941 
942  // calculating the fMethodError and the boostWeight out of it uses the formula
943  // w = ((1-err)/err)^beta
944 
945  Double_t boostWeight=0;
946 
947  if (fMethodError == 0) { //no misclassification made.. perfect, no boost ;)
948  Log() << kWARNING << "Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " << Endl;
949  }else{
950 
951  if (discreteAdaBoost)
952  boostWeight = TMath::Log((1.-fMethodError)/fMethodError)*fAdaBoostBeta;
953  else
954  boostWeight = TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
955 
956 
957  // std::cout << "boostweight = " << boostWeight << std::endl;
958 
959  // ADA boosting, rescaling the weight of the wrong events according to the error level
960  // over the entire test sample rescaling all the weights to have the same sum, but without
961  // touching the original weights (changing only the boosted weight of all the events)
962  // first reweight
963 
964  Double_t newSum=0., oldSum=0.;
965 
966 
967  Double_t boostfactor = TMath::Exp(boostWeight);
968 
969 
970  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
971  const Event* ev = Data()->GetEvent(ievt);
972  oldSum += ev->GetWeight();
973  if (discreteAdaBoost){
974  // events are classified as Signal OR background .. right or wrong
975  if (WrongDetection[ievt] && boostWeight != 0) {
976  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
977  else ev->ScaleBoostWeight(1./boostfactor);
978  }
979  // if (ievt<30) std::cout<<ievt<<" var0="<<ev->GetValue(0)<<" var1="<<ev->GetValue(1)<<" weight="<<ev->GetWeight() << " boostby:"<<boostfactor<<std::endl;
980 
981  }else{
982  // events are classified by their probability of being signal or background
983  // (eventually you should write this one - i.e. re-use the MVA value that were already
984  // calculated and stored.. however ,for the moment ..
985  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
986  mvaProb = 2*(mvaProb-0.5);
987  // mvaProb = (1-mvaProb);
988 
989  Int_t trueType=1;
990  if (DataInfo().IsSignal(ev)) trueType = 1;
991  else trueType = -1;
992 
993  boostfactor = TMath::Exp(-1*boostWeight*trueType*mvaProb);
994  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
995  else ev->ScaleBoostWeight(1./boostfactor);
996 
997  }
998  newSum += ev->GetWeight();
999  }
1000 
1001  Double_t normWeight = oldSum/newSum;
1002  // next normalize the weights
1003  Double_t normSig=0, normBkg=0;
1004  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1005  const Event* ev = Data()->GetEvent(ievt);
1006  ev->ScaleBoostWeight(normWeight);
1007  if (ev->GetClass()) normSig+=ev->GetWeight();
1008  else normBkg+=ev->GetWeight();
1009  }
1010 
1011  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
1012  results->GetHist("SoverBtotal")->SetBinContent(fCurrentMethodIdx+1, normSig/normBkg);
1013 
1014  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1015  const Event* ev = Data()->GetEvent(ievt);
1016 
1017  if (ev->GetClass()) ev->ScaleBoostWeight(oldSum/normSig/2);
1018  else ev->ScaleBoostWeight(oldSum/normBkg/2);
1019  }
1020  }
1021 
1022  delete[] WrongDetection;
1023  if (MVAProb) delete MVAProb;
1024 
1025  fBoostWeight = boostWeight; // used ONLY for the monitoring tree
1026 
1027  return boostWeight;
1028 }
1029 
1030 
1031 ////////////////////////////////////////////////////////////////////////////////
1032 /// Bagging or Bootstrap boosting, gives new random poisson weight for every event
1033 
1035 {
1036  TRandom3 *trandom = new TRandom3(fRandomSeed+fMethods.size());
1037  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1038  const Event* ev = Data()->GetEvent(ievt);
1039  ev->SetBoostWeight(trandom->PoissonD(fBaggedSampleFraction));
1040  }
1041  fBoostWeight = 1; // used ONLY for the monitoring tree
1042  return 1.;
1043 }
1044 
1045 
1046 ////////////////////////////////////////////////////////////////////////////////
1047 /// Get help message text
1048 ///
1049 /// typical length of text line:
1050 /// "|--------------------------------------------------------------|"
1051 
1053 {
1054  Log() << Endl;
1055  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1056  Log() << Endl;
1057  Log() << "This method combines several classifier of one species in a "<<Endl;
1058  Log() << "single multivariate quantity via the boost algorithm." << Endl;
1059  Log() << "the output is a weighted sum over all individual classifiers" <<Endl;
1060  Log() << "By default, the AdaBoost method is employed, which gives " << Endl;
1061  Log() << "events that were misclassified in the previous tree a larger " << Endl;
1062  Log() << "weight in the training of the following classifier."<<Endl;
1063  Log() << "Optionally, Bagged boosting can also be applied." << Endl;
1064  Log() << Endl;
1065  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1066  Log() << Endl;
1067  Log() << "The most important parameter in the configuration is the "<<Endl;
1068  Log() << "number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
1069  Log() << "(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
1070  Log() << "AdaBoosting: The most important parameters in this configuration" <<Endl;
1071  Log() << "is the beta parameter (Boost_AdaBoostBeta) " << Endl;
1072  Log() << "When boosting a linear classifier, it is sometimes advantageous"<<Endl;
1073  Log() << "to transform the MVA output non-linearly. The following options" <<Endl;
1074  Log() << "are available: step, log, and minmax, the default is no transform."<<Endl;
1075  Log() <<Endl;
1076  Log() << "Some classifiers are hard to boost and do not improve much in"<<Endl;
1077  Log() << "their performance by boosting them, some even slightly deteriorate"<< Endl;
1078  Log() << "due to the boosting." <<Endl;
1079  Log() << "The booking of the boost method is special since it requires"<<Endl;
1080  Log() << "the booing of the method to be boosted and the boost itself."<<Endl;
1081  Log() << "This is solved by booking the method to be boosted and to add"<<Endl;
1082  Log() << "all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
1083  Log() << "options string. The factory separates the options and initiates"<<Endl;
1084  Log() << "the boost process. The TMVA macro directory contains the example"<<Endl;
1085  Log() << "macro \"Boost.C\"" <<Endl;
1086 }
1087 
1088 ////////////////////////////////////////////////////////////////////////////////
1089 
1091 {
1092  return 0;
1093 }
1094 
1095 ////////////////////////////////////////////////////////////////////////////////
1096 /// return boosted MVA response
1097 
1099 {
1100  Double_t mvaValue = 0;
1101  Double_t norm = 0;
1102  Double_t epsilon = TMath::Exp(-1.);
1103  //Double_t fact = TMath::Exp(-1.)+TMath::Exp(1.);
1104  for (UInt_t i=0;i< fMethods.size(); i++){
1105  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]);
1106  if (m==0) continue;
1107  Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
1108  Double_t sigcut = m->GetSignalReferenceCut();
1109 
1110  // default is no transform
1111  if (fTransformString == "linear"){
1112 
1113  }
1114  else if (fTransformString == "log"){
1115  if (val < sigcut) val = sigcut;
1116 
1117  val = TMath::Log((val-sigcut)+epsilon);
1118  }
1119  else if (fTransformString == "step" ){
1120  if (m->IsSignalLike(val)) val = 1.;
1121  else val = -1.;
1122  }
1123  else if (fTransformString == "gauss"){
1124  val = TMath::Gaus((val-sigcut),1);
1125  }
1126  else {
1127  Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl;
1128  }
1129  mvaValue+=val*fMethodWeight[i];
1130  norm +=fMethodWeight[i];
1131  // std::cout << "mva("<<i<<") = "<<val<<" " << valx<< " " << mvaValue<<" and sigcut="<<sigcut << std::endl;
1132  }
1133  mvaValue/=norm;
1134  // cannot determine error
1135  NoErrorCalc(err, errUpper);
1136 
1137  return mvaValue;
1138 }
1139 
1140 ////////////////////////////////////////////////////////////////////////////////
1141 /// Calculate the ROC integral of a single classifier or even the
1142 /// whole boosted classifier. The tree type (training or testing
1143 /// sample) is specified by 'eTT'.
1144 ///
1145 /// If tree type kTraining is set, the original training sample is
1146 /// used to compute the ROC integral (original weights).
1147 ///
1148 /// - singleMethod - if kTRUE, return ROC integral of single (last
1149 /// trained) classifier; if kFALSE, return ROC
1150 /// integral of full classifier
1151 ///
1152 /// - eTT - tree type (Types::kTraining / Types::kTesting)
1153 ///
1154 /// - CalcOverlapIntergral - if kTRUE, the overlap integral of the
1155 /// signal/background MVA distributions
1156 /// is calculated and stored in
1157 /// 'fOverlap_integral'
1158 
1160 {
1161  // set data sample training / testing
1162  Data()->SetCurrentType(eTT);
1163 
1164  MethodBase* method = singleMethod ? dynamic_cast<MethodBase*>(fMethods.back()) : 0; // ToDo CoVerity flags this line as there is no protection against a zero-pointer delivered by dynamic_cast
1165  // to make CoVerity happy (although, OF COURSE, the last method in the committee
1166  // has to be also of type MethodBase as ANY method is... hence the dynamic_cast
1167  // will never by "zero" ...
1168  if (singleMethod && !method) {
1169  Log() << kFATAL << " What do you do? Your method:"
1170  << fMethods.back()->GetName()
1171  << " seems not to be a propper TMVA method"
1172  << Endl;
1173  std::exit(1);
1174  }
1175  Double_t err = 0.0;
1176 
1177  // temporary renormalize the method weights in case of evaluation
1178  // of full classifier.
1179  // save the old normalization of the methods
1180  std::vector<Double_t> OldMethodWeight(fMethodWeight);
1181  if (!singleMethod) {
1182  // calculate sum of weights of all methods
1183  Double_t AllMethodsWeight = 0;
1184  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1185  AllMethodsWeight += fMethodWeight.at(i);
1186  // normalize the weights of the classifiers
1187  if (AllMethodsWeight != 0.0) {
1188  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1189  fMethodWeight[i] /= AllMethodsWeight;
1190  }
1191  }
1192 
1193  // calculate MVA values
1194  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
1195  std::vector <Float_t>* mvaRes;
1196  if (singleMethod && eTT==Types::kTraining)
1197  mvaRes = fMVAvalues; // values already calculated
1198  else {
1199  mvaRes = new std::vector <Float_t>(GetNEvents());
1200  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1201  GetEvent(ievt);
1202  (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
1203  }
1204  }
1205 
1206  // restore the method weights
1207  if (!singleMethod)
1208  fMethodWeight = OldMethodWeight;
1209 
1210  // now create histograms for calculation of the ROC integral
1211  Int_t signalClass = 0;
1212  if (DataInfo().GetClassInfo("Signal") != 0) {
1213  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1214  }
1215  gTools().ComputeStat( GetEventCollection(eTT), mvaRes,
1216  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1217 
1219  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
1220  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
1221 
1222  // calculate ROC integral
1223  TH1* mva_s = new TH1F( "MVA_S", "MVA_S", fNbins, xmin, xmax );
1224  TH1* mva_b = new TH1F( "MVA_B", "MVA_B", fNbins, xmin, xmax );
1225  TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1226  if (CalcOverlapIntergral) {
1227  mva_s_overlap = new TH1F( "MVA_S_OVERLAP", "MVA_S_OVERLAP", fNbins, xmin, xmax );
1228  mva_b_overlap = new TH1F( "MVA_B_OVERLAP", "MVA_B_OVERLAP", fNbins, xmin, xmax );
1229  }
1230  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1231  const Event* ev = GetEvent(ievt);
1232  Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
1233  if (DataInfo().IsSignal(ev)) mva_s->Fill( (*mvaRes)[ievt], w );
1234  else mva_b->Fill( (*mvaRes)[ievt], w );
1235 
1236  if (CalcOverlapIntergral) {
1237  Float_t w_ov = ev->GetWeight();
1238  if (DataInfo().IsSignal(ev))
1239  mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
1240  else
1241  mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1242  }
1243  }
1244  gTools().NormHist( mva_s );
1245  gTools().NormHist( mva_b );
1246  PDF *fS = new PDF( "PDF Sig", mva_s, PDF::kSpline2 );
1247  PDF *fB = new PDF( "PDF Bkg", mva_b, PDF::kSpline2 );
1248 
1249  // calculate ROC integral from fS, fB
1250  Double_t ROC = MethodBase::GetROCIntegral(fS, fB);
1251 
1252  // calculate overlap integral
1253  if (CalcOverlapIntergral) {
1254  gTools().NormHist( mva_s_overlap );
1255  gTools().NormHist( mva_b_overlap );
1256 
1257  fOverlap_integral = 0.0;
1258  for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
1259  Double_t bc_s = mva_s_overlap->GetBinContent(bin);
1260  Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1261  if (bc_s > 0.0 && bc_b > 0.0)
1262  fOverlap_integral += TMath::Min(bc_s, bc_b);
1263  }
1264 
1265  delete mva_s_overlap;
1266  delete mva_b_overlap;
1267  }
1268 
1269  delete mva_s;
1270  delete mva_b;
1271  delete fS;
1272  delete fB;
1273  if (!(singleMethod && eTT==Types::kTraining)) delete mvaRes;
1274 
1275  Data()->SetCurrentType(Types::kTraining);
1276 
1277  return ROC;
1278 }
1279 
1281 {
1282  // Calculate MVA values of current method fMethods.back() on
1283  // training sample
1284 
1285  Data()->SetCurrentType(Types::kTraining);
1286  MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
1287  if (!method) {
1288  Log() << kFATAL << "dynamic cast to MethodBase* failed" <<Endl;
1289  return;
1290  }
1291  // calculate MVA values
1292  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1293  GetEvent(ievt);
1294  fMVAvalues->at(ievt) = method->GetMvaValue();
1295  }
1296 
1297  // fill cumulative mva distribution
1298 
1299 
1300 }
1301 
1302 ////////////////////////////////////////////////////////////////////////////////
1303 /// fill various monitoring histograms from information of the individual classifiers that
1304 /// have been boosted.
1305 /// of course.... this depends very much on the individual classifiers, and so far, only for
1306 /// Decision Trees, this monitoring is actually implemented
1307 
1309 {
1310  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
1311 
1312  if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kDT) {
1313  TMVA::MethodDT* currentDT=dynamic_cast<TMVA::MethodDT*>(GetCurrentMethod(methodIndex));
1314  if (currentDT){
1315  if (stage == Types::kBoostProcBegin){
1316  results->Store(new TH1I("NodesBeforePruning","nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesBeforePruning");
1317  results->Store(new TH1I("NodesAfterPruning","nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesAfterPruning");
1318  }
1319 
1320  if (stage == Types::kBeforeTraining){
1321  }
1322  else if (stage == Types::kBeforeBoosting){
1323  results->GetHist("NodesBeforePruning")->SetBinContent(methodIndex+1,currentDT->GetNNodesBeforePruning());
1324  results->GetHist("NodesAfterPruning")->SetBinContent(methodIndex+1,currentDT->GetNNodes());
1325  }
1326  else if (stage == Types::kAfterBoosting){
1327 
1328  }
1329  else if (stage != Types::kBoostProcEnd){
1330  Log() << kINFO << "<Train> average number of nodes before/after pruning : "
1331  << results->GetHist("NodesBeforePruning")->GetMean() << " / "
1332  << results->GetHist("NodesAfterPruning")->GetMean()
1333  << Endl;
1334  }
1335  }
1336 
1337  }else if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kFisher) {
1338  if (stage == Types::kAfterBoosting){
1340  }
1341  }else{
1342  if (methodIndex < 3){
1343  Log() << kDEBUG << "No detailed boost monitoring for "
1344  << GetCurrentMethod(methodIndex)->GetMethodName()
1345  << " yet available " << Endl;
1346  }
1347  }
1348 
1349  //boosting plots universal for all classifiers 'typically for debug purposes only as they are not general enough'
1350 
1351  if (stage == Types::kBeforeBoosting){
1352  // if you want to display the weighted events for 2D case at each boost step:
1353  if (fDetailedMonitoring){
1354  // the following code is useful only for 2D examples - mainly illustration for debug/educational purposes:
1355  if (DataInfo().GetNVariables() == 2) {
1356  results->Store(new TH2F(Form("EventDistSig_%d",methodIndex),Form("EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1357  results->GetHist(Form("EventDistSig_%d",methodIndex))->SetMarkerColor(4);
1358  results->Store(new TH2F(Form("EventDistBkg_%d",methodIndex),Form("EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1359  results->GetHist(Form("EventDistBkg_%d",methodIndex))->SetMarkerColor(2);
1360 
1361  Data()->SetCurrentType(Types::kTraining);
1362  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1363  const Event* ev = GetEvent(ievt);
1364  Float_t w = ev->GetWeight();
1365  Float_t v0= ev->GetValue(0);
1366  Float_t v1= ev->GetValue(1);
1367  // if (ievt<3) std::cout<<ievt<<" var0="<<v0<<" var1="<<v1<<" weight="<<w<<std::endl;
1368  TH2* h;
1369  if (DataInfo().IsSignal(ev)) h=results->GetHist2D(Form("EventDistSig_%d",methodIndex));
1370  else h=results->GetHist2D(Form("EventDistBkg_%d",methodIndex));
1371  if (h) h->Fill(v0,v1,w);
1372  }
1373  }
1374  }
1375  }
1376 
1377  return;
1378 }
1379 
1380 
TMVA::MethodBase::TestClassification
virtual void TestClassification()
initialization
Definition: MethodBase.cxx:1125
m
auto * m
Definition: textangle.C:8
TMVA::MethodDT
Analysis of Boosted Decision Trees.
Definition: MethodDT.h:49
TMVA::MethodBase::BaseDir
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
Definition: MethodBase.cxx:1980
TArrayD::GetSum
Stat_t GetSum() const
Definition: TArrayD.h:46
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:100
TMVA::MethodBoost::CreateMVAHistorgrams
void CreateMVAHistorgrams()
Definition: MethodBoost.cxx:538
v0
@ v0
Definition: rootcling_impl.cxx:3665
TH1F.h
TMVA::Tools::NormHist
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:395
TDirectory::mkdir
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
Definition: TDirectory.cxx:1050
TMVA::MethodBase::SetModelPersistence
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:382
TMVA::MethodBase::GetROCIntegral
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
Definition: MethodBase.cxx:2822
TDirectory.h
TMVA::Event::ScaleBoostWeight
void ScaleBoostWeight(Double_t s) const
Definition: Event.h:112
TMath::Max
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
TMVA::Types::EBoostStage
EBoostStage
Definition: Types.h:152
TH1I
1-D histogram with an int per channel (see TH1 documentation)}
Definition: TH1.h:534
TH2F
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:251
DataSetInfo.h
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
Form
char * Form(const char *fmt,...)
TMVA::Ranking
Ranking for variables in method (implementation)
Definition: Ranking.h:48
TMVA::Event::GetClass
UInt_t GetClass() const
Definition: Event.h:86
MethodFisher.h
TMVA::Event::GetOriginalWeight
Double_t GetOriginalWeight() const
Definition: Event.h:84
xmax
float xmax
Definition: THbookFile.cxx:95
TMVA::MethodBoost::InitHistos
void InitHistos()
initialisation routine
Definition: MethodBoost.cxx:271
IMethod.h
TMVA::MethodBase::SetSilentFile
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:378
sum
static uint64_t sum(uint64_t i)
Definition: Factory.cxx:2345
Long64_t
long long Long64_t
Definition: RtypesCore.h:80
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
TMVA::MethodBoost::CheckSetup
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBoost.cxx:328
TMVA::MethodBase::DeclareCompatibilityOptions
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:596
TMath::Gaus
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:448
TMVA::Types::kTesting
@ kTesting
Definition: Types.h:146
TMVA::SeparationBase::GetSeparationIndex
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
TTree
A TTree represents a columnar dataset.
Definition: TTree.h:79
TH1D
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:618
GiniIndex.h
TMVA::Results::GetHist
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:136
TMVA::MethodBoost::TestClassification
virtual void TestClassification()
initialization
Definition: MethodBoost.cxx:609
TRandom::PoissonD
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
Definition: TRandom.cxx:454
TMVA::MethodBase::MethodBoost
friend class MethodBoost
Definition: MethodBase.h:116
TMVA::MethodBase::TrainMethod
void TrainMethod()
Definition: MethodBase.cxx:650
Float_t
float Float_t
Definition: RtypesCore.h:57
TMath::Exp
Double_t Exp(Double_t x)
Definition: TMath.h:727
TMVA::Tools::ComputeStat
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:214
TMVA::Timer::DrawProgressBar
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:202
TH1::SetBinContent
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:9056
TMVA::MethodBoost::FindMVACut
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
Definition: MethodBoost.cxx:690
TMVA::MethodBase::IsSignalLike
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:854
MethodBase.h
TDirectory::cd
virtual Bool_t cd(const char *path=nullptr)
Change current directory to "this" directory.
Definition: TDirectory.cxx:504
TMVA::MsgLogger::InhibitOutput
static void InhibitOutput()
Definition: MsgLogger.cxx:73
TMVA::MethodBase::SetSignalReferenceCut
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:364
TTree.h
TMVA::QuickMVAProbEstimator
Definition: QuickMVAProbEstimator.h:11
TMVA::GiniIndex
Implementation of the GiniIndex as separation criterion.
Definition: GiniIndex.h:63
TString
Basic string class.
Definition: TString.h:136
TMVA::Types::GetMethodName
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:135
TMVA::MethodBoost::fHistoricBoolOption
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:195
TMVA::MethodCategory
Class for categorizing the phase space.
Definition: MethodCategory.h:58
TMVA::Types::kBoostProcBegin
@ kBoostProcBegin
Definition: Types.h:153
TMVA::MethodBoost::GetBoostROCIntegral
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Definition: MethodBoost.cxx:1159
TMVA::MethodBoost::GetHelpMessage
void GetHelpMessage() const
Get help message text.
Definition: MethodBoost.cxx:1052
v
@ v
Definition: rootcling_impl.cxx:3664
REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition: ClassifierFactory.h:124
TMVA::MethodCompositeBase
Virtual base class for combining several TMVA method.
Definition: MethodCompositeBase.h:50
bool
TMVA::Config::VariablePlotting::fNbinsXOfROCCurve
Int_t fNbinsXOfROCCurve
Definition: Config.h:113
TMVA::MethodBase::GetMvaValue
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
PDF.h
TMVA::ClassifierFactory::Instance
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
Definition: ClassifierFactory.cxx:48
RegressionVariance.h
TMVA::Types::kAfterBoosting
@ kAfterBoosting
Definition: Types.h:156
TMVA::Event::GetValue
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:236
TSpline.h
TH1::GetMean
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:7422
TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:62
TMVA::Results::Store
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:86
QuickMVAProbEstimator.h
SeparationBase.h
TMVA::Timer::GetElapsedTime
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:146
TMVA::Types::kFisher
@ kFisher
Definition: Types.h:84
TMVA::QuickMVAProbEstimator::AddEvent
void AddEvent(Double_t val, Double_t weight, Int_t type)
Definition: QuickMVAProbEstimator.cxx:14
TH1::GetBinContent
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4993
MethodBoost.h
Timer.h
TMVA::MethodBoost::ProcessOptions
void ProcessOptions()
process user options
Definition: MethodBoost.cxx:663
TMVA::SeparationBase::GetSeparationGain
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
Definition: SeparationBase.cxx:101
TMVA::MethodBoost::SingleBoost
Double_t SingleBoost(MethodBase *method)
Definition: MethodBoost.cxx:853
TH1::GetYaxis
TAxis * GetYaxis()
Definition: TH1.h:321
xmin
float xmin
Definition: THbookFile.cxx:95
TMVA::MethodBoost::ResetBoostWeights
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Definition: MethodBoost.cxx:569
TMVA::MethodBoost
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
TRandom3
Random number generator class based on M.
Definition: TRandom3.h:27
TMVA::Types::kBeforeBoosting
@ kBeforeBoosting
Definition: Types.h:155
TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:128
h
#define h(i)
Definition: RSha256.hxx:106
TMVA::MethodBoost::CleanBoostOptions
void CleanBoostOptions()
Definition: MethodBoost.cxx:531
TMVA::MethodDT::GetNNodes
Int_t GetNNodes()
Definition: MethodDT.h:97
TMVA::Types::ETreeType
ETreeType
Definition: Types.h:144
epsilon
REAL epsilon
Definition: triangle.c:617
TAttMarker::SetMarkerColor
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Definition: TAttMarker.h:38
TMVA::gConfig
Config & gConfig()
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:101
TMVA::MethodBase::WriteEvaluationHistosToFile
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Definition: MethodBase.cxx:2094
TMVA::PDF::kSpline2
@ kSpline2
Definition: PDF.h:70
TMVA::Types::kClassification
@ kClassification
Definition: Types.h:129
TH1::Fill
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3350
TMVA::Types::kCategory
@ kCategory
Definition: Types.h:99
TMVA::MethodCompositeBase::GetMvaValue
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
TH2
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
TRandom3.h
TMVA::MethodBoost::SingleTrain
void SingleTrain()
initialization
Definition: MethodBoost.cxx:670
TMVA::Config::IONames::fWeightFileDir
TString fWeightFileDir
Definition: Config.h:126
TDirectory::GetDirectory
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
Definition: TDirectory.cxx:407
TMVA::MethodBase::SetWeightFileDir
void SetWeightFileDir(TString fileDir)
set directory of weight file
Definition: MethodBase.cxx:2059
TMVA::MethodBoost::WriteMonitoringHistosToFile
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------—
Definition: MethodBoost.cxx:579
TMVA::MethodBase
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
TMVA::Types
Singleton class for Global types used by TMVA.
Definition: Types.h:73
TMVA::MethodBoost::fMVAvalues
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:191
Types.h
TNamed::SetTitle
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
Configurable.h
TMVA::MethodDT::GetNNodesBeforePruning
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:96
TMVA::Results
Class that is the base-class for a vector of result.
Definition: Results.h:57
TH2F.h
TMVA::MethodBoost::CreateRanking
const Ranking * CreateRanking()
Definition: MethodBoost.cxx:1090
TMVA::MethodBoost::MonitorBoost
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
Definition: MethodBoost.cxx:1308
TMVA::Types::kMaxTreeType
@ kMaxTreeType
Definition: Types.h:147
TMath::Min
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
TMVA::MethodBoost::DeclareOptions
void DeclareOptions()
Definition: MethodBoost.cxx:176
Config.h
unsigned int
TMVA::Timer
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TMVA::TMVAGlob::GetMethodName
void GetMethodName(TString &name, TKey *mkey)
Definition: tmvaglob.cxx:342
TMVA::IMethod
Interface for all concrete MVA method implementations.
Definition: IMethod.h:53
TMVA::PDF
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
TMVA::Tools::Color
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
TMVA::Types::kTraining
@ kTraining
Definition: Types.h:145
TH1::GetBinLowEdge
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8986
TMVA::SeparationBase
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Definition: SeparationBase.h:82
TMVA::MethodBoost::Train
void Train(void)
Definition: MethodBoost.cxx:351
TMVA::MsgLogger::EnableOutput
static void EnableOutput()
Definition: MsgLogger.cxx:74
TMVA::MethodBoost::AdaBoost
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
Definition: MethodBoost.cxx:870
v1
@ v1
Definition: rootcling_impl.cxx:3666
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::MethodCompositeBase::fMethods
std::vector< IMethod * > fMethods
Definition: MethodCompositeBase.h:109
MethodDT.h
TMVA::Types::Instance
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:69
TMVA::Event::GetWeight
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:381
TMVA::MethodBoost::fDataSetManager
DataSetManager * fDataSetManager
Definition: MethodBoost.h:193
TMVA::kFATAL
@ kFATAL
Definition: Types.h:63
TMVA::Types::EMVA
EMVA
Definition: Types.h:78
TH1F
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
TMVA::Config::GetIONames
IONames & GetIONames()
Definition: Config.h:100
TMVA::Event
Definition: Event.h:51
TH1
TH1 is the base class of all histogram classes in ROOT.
Definition: TH1.h:58
TMVA::MethodBase::SetSignalReferenceCutOrientation
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:365
TMVA::MethodBoost::~MethodBoost
virtual ~MethodBoost(void)
destructor
Definition: MethodBoost.cxx:143
TMVA::kDEBUG
@ kDEBUG
Definition: Types.h:58
CrossEntropy.h
TMVA::MethodCategory::fDataSetManager
DataSetManager * fDataSetManager
Definition: MethodCategory.h:138
TObject::Delete
virtual void Delete(Option_t *option="")
Delete this object.
Definition: TObject.cxx:171
TMVA::MethodBase::GetMethodType
Types::EMVA GetMethodType() const
Definition: MethodBase.h:333
TDirectory
Describe directory structure in memory.
Definition: TDirectory.h:45
MethodCompositeBase.h
TMVA::Results::GetHist2D
TH2 * GetHist2D(const TString &alias) const
Definition: Results.cxx:145
MisClassificationError.h
TMVA::kINFO
@ kINFO
Definition: Types.h:60
TMVA::MethodBoost::HasAnalysisType
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
Definition: MethodBoost.cxx:166
TMVA::MethodBase::GetSignalReferenceCut
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:360
Tools.h
TMVA::Types::kBoostProcEnd
@ kBoostProcEnd
Definition: Types.h:157
ClassifierFactory.h
TMVA::Config::fVariablePlotting
class TMVA::Config::VariablePlotting fVariablePlotting
type
int type
Definition: TGX11.cxx:121
TMVA::MethodBoost::CalcMVAValues
void CalcMVAValues()
Definition: MethodBoost.cxx:1280
TString::ToLower
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1147
TH1::GetXaxis
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:320
Results.h
TMVA::gTools
Tools & gTools()
TMVA::Types::kDT
@ kDT
Definition: Types.h:89
TMVA::MethodBoost::Init
void Init()
Definition: MethodBoost.cxx:264
MethodCategory.h
TMVA::MethodBoost::DeclareCompatibilityOptions
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBoost.cxx:215
TMVA::QuickMVAProbEstimator::GetMVAProbAt
Double_t GetMVAProbAt(Double_t value)
Definition: QuickMVAProbEstimator.cxx:24
TMVA::MethodBoost::BookMethod
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
Definition: MethodBoost.cxx:250
DataSet.h
TMVA::Event::SetBoostWeight
void SetBoostWeight(Double_t w) const
Definition: Event.h:111
TMVA::Types::kBeforeTraining
@ kBeforeTraining
Definition: Types.h:154
TMVA::ClassifierFactory::Create
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
Definition: ClassifierFactory.cxx:89
TMVA::kWARNING
@ kWARNING
Definition: Types.h:61
TMVA::MethodBoost::Bagging
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Definition: MethodBoost.cxx:1034
TMath.h
TH1::GetNbinsX
virtual Int_t GetNbinsX() const
Definition: TH1.h:296
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
TMVA::MethodBoost::WriteEvaluationHistosToFile
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Definition: MethodBoost.cxx:637
int
RooFit::Timer
RooCmdArg Timer(Bool_t flag=kTRUE)
Definition: RooGlobalFunc.cxx:203