Logo ROOT   6.12/07
Reference Guide
MethodBase.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21  * *
22  * Copyright (c) 2005-2011: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  * *
32  **********************************************************************************/
33 
34 /*! \class TMVA::MethodBase
35 \ingroup TMVA
36 
37  Virtual base Class for all MVA method
38 
39  MethodBase hosts several specific evaluation methods.
40 
41  The kind of MVA that provides optimal performance in an analysis strongly
42  depends on the particular application. The evaluation factory provides a
43  number of numerical benchmark results to directly assess the performance
44  of the MVA training on the independent test sample. These are:
45 
46  - The _signal efficiency_ at three representative background efficiencies
47  (which is 1 &minus; rejection).
48  - The _significance_ of an MVA estimator, defined by the difference
49  between the MVA mean values for signal and background, divided by the
50  quadratic sum of their root mean squares.
51  - The _separation_ of an MVA _x_, defined by the integral
52  \f[
53  \frac{1}{2} \int \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx
54  \f]
55  where
56  \f$ S(x) \f$ and \f$ B(x) \f$ are the signal and background distributions,
57  respectively. The separation is zero for identical signal and background MVA
58  shapes, and it is one for disjunctive shapes.
59  - The average, \f$ \int x \mu (S(x)) dx \f$, of the signal \f$ \mu_{transform} \f$.
60  The \f$ \mu_{transform} \f$ of an MVA denotes the transformation that yields
61  a uniform background distribution. In this way, the signal distributions
62  \f$ S(x) \f$ can be directly compared among the various MVAs. The stronger
63  \f$ S(x) \f$ peaks towards one, the better is the discrimination of the MVA.
64  The \f$ \mu_{transform} \f$ is
65  [documented here](http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html).
66 
67  The MVA standard output also prints the linear correlation coefficients between
68  signal and background, which can be useful to eliminate variables that exhibit too
69  strong correlations.
70 */
71 
72 #include "TMVA/MethodBase.h"
73 
74 #include "TMVA/Config.h"
75 #include "TMVA/Configurable.h"
76 #include "TMVA/DataSetInfo.h"
77 #include "TMVA/DataSet.h"
78 #include "TMVA/Factory.h"
79 #include "TMVA/IMethod.h"
80 #include "TMVA/MsgLogger.h"
81 #include "TMVA/PDF.h"
82 #include "TMVA/Ranking.h"
83 #include "TMVA/Factory.h"
84 #include "TMVA/DataLoader.h"
85 #include "TMVA/Tools.h"
86 #include "TMVA/Results.h"
88 #include "TMVA/ResultsRegression.h"
89 #include "TMVA/ResultsMulticlass.h"
90 #include "TMVA/RootFinder.h"
91 #include "TMVA/Timer.h"
92 #include "TMVA/Tools.h"
93 #include "TMVA/TSpline1.h"
94 #include "TMVA/Types.h"
98 #include "TMVA/VariableInfo.h"
101 #include "TMVA/VariableTransform.h"
102 #include "TMVA/Version.h"
103 
104 #include "TROOT.h"
105 #include "TSystem.h"
106 #include "TObjString.h"
107 #include "TQObject.h"
108 #include "TSpline.h"
109 #include "TMatrix.h"
110 #include "TMath.h"
111 #include "TH1F.h"
112 #include "TH2F.h"
113 #include "TFile.h"
114 #include "TKey.h"
115 #include "TGraph.h"
116 #include "Riostream.h"
117 #include "TXMLEngine.h"
118 
119 #include <iomanip>
120 #include <iostream>
121 #include <fstream>
122 #include <sstream>
123 #include <cstdlib>
124 #include <algorithm>
125 #include <limits>
126 
127 
129 
130 using std::endl;
131 using std::atof;
132 
133 //const Int_t MethodBase_MaxIterations_ = 200;
135 
136 //const Int_t NBIN_HIST_PLOT = 100;
137 const Int_t NBIN_HIST_HIGH = 10000;
138 
139 #ifdef _WIN32
140 /* Disable warning C4355: 'this' : used in base member initializer list */
141 #pragma warning ( disable : 4355 )
142 #endif
143 
144 
145 #include "TGraph.h"
146 #include "TMultiGraph.h"
147 
148 ////////////////////////////////////////////////////////////////////////////////
149 /// standard constructor
150 
152 {
153  fNumGraphs = 0;
154  fIndex = 0;
155 }
156 
157 ////////////////////////////////////////////////////////////////////////////////
158 /// standard destructor
160 {
161  if (fMultiGraph){
162  delete fMultiGraph;
163  fMultiGraph = nullptr;
164  }
165  return;
166 }
167 
168 ////////////////////////////////////////////////////////////////////////////////
169 /// This function gets some title and it creates a TGraph for every title.
170 /// It also sets up the style for every TGraph. All graphs are added to a single TMultiGraph.
171 ///
172 /// \param[in] graphTitles vector of titles
173 
174 void TMVA::IPythonInteractive::Init(std::vector<TString>& graphTitles)
175 {
176  if (fNumGraphs!=0){
177  std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
178  return;
179  }
180  Int_t color = 2;
181  for(auto& title : graphTitles){
182  fGraphs.push_back( new TGraph() );
183  fGraphs.back()->SetTitle(title);
184  fGraphs.back()->SetName(title);
185  fGraphs.back()->SetFillColor(color);
186  fGraphs.back()->SetLineColor(color);
187  fGraphs.back()->SetMarkerColor(color);
188  fMultiGraph->Add(fGraphs.back());
189  color += 2;
190  fNumGraphs += 1;
191  }
192  return;
193 }
194 
195 ////////////////////////////////////////////////////////////////////////////////
196 /// This function sets the point number to 0 for all graphs.
197 
199 {
200  for(Int_t i=0; i<fNumGraphs; i++){
201  fGraphs[i]->Set(0);
202  }
203 }
204 
205 ////////////////////////////////////////////////////////////////////////////////
206 /// This function is used only in 2 TGraph case, and it will add new data points to graphs.
207 ///
208 /// \param[in] x the x coordinate
209 /// \param[in] y1 the y coordinate for the first TGraph
210 /// \param[in] y2 the y coordinate for the second TGraph
211 
213 {
214  fGraphs[0]->Set(fIndex+1);
215  fGraphs[1]->Set(fIndex+1);
216  fGraphs[0]->SetPoint(fIndex, x, y1);
217  fGraphs[1]->SetPoint(fIndex, x, y2);
218  fIndex++;
219  return;
220 }
221 
222 ////////////////////////////////////////////////////////////////////////////////
223 /// This function can add data points to as many TGraphs as we have.
224 ///
225 /// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
226 /// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
227 
228 void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
229 {
230  for(Int_t i=0; i<fNumGraphs;i++){
231  fGraphs[i]->Set(fIndex+1);
232  fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
233  }
234  fIndex++;
235  return;
236 }
237 
238 
239 ////////////////////////////////////////////////////////////////////////////////
240 /// standard constructor
241 
243  Types::EMVA methodType,
244  const TString& methodTitle,
245  DataSetInfo& dsi,
246  const TString& theOption) :
247  IMethod(),
248  Configurable ( theOption ),
249  fTmpEvent ( 0 ),
250  fRanking ( 0 ),
251  fInputVars ( 0 ),
252  fAnalysisType ( Types::kNoAnalysisType ),
253  fRegressionReturnVal ( 0 ),
254  fMulticlassReturnVal ( 0 ),
255  fDataSetInfo ( dsi ),
256  fSignalReferenceCut ( 0.5 ),
257  fSignalReferenceCutOrientation( 1. ),
258  fVariableTransformType ( Types::kSignal ),
259  fJobName ( jobName ),
260  fMethodName ( methodTitle ),
261  fMethodType ( methodType ),
262  fTestvar ( "" ),
263  fTMVATrainingVersion ( TMVA_VERSION_CODE ),
264  fROOTTrainingVersion ( ROOT_VERSION_CODE ),
265  fConstructedFromWeightFile ( kFALSE ),
266  fBaseDir ( 0 ),
267  fMethodBaseDir ( 0 ),
268  fFile ( 0 ),
269  fSilentFile (kFALSE),
270  fModelPersistence (kTRUE),
271  fWeightFile ( "" ),
272  fEffS ( 0 ),
273  fDefaultPDF ( 0 ),
274  fMVAPdfS ( 0 ),
275  fMVAPdfB ( 0 ),
276  fSplS ( 0 ),
277  fSplB ( 0 ),
278  fSpleffBvsS ( 0 ),
279  fSplTrainS ( 0 ),
280  fSplTrainB ( 0 ),
281  fSplTrainEffBvsS ( 0 ),
282  fVarTransformString ( "None" ),
283  fTransformationPointer ( 0 ),
284  fTransformation ( dsi, methodTitle ),
285  fVerbose ( kFALSE ),
286  fVerbosityLevelString ( "Default" ),
287  fHelp ( kFALSE ),
288  fHasMVAPdfs ( kFALSE ),
289  fIgnoreNegWeightsInTraining( kFALSE ),
290  fSignalClass ( 0 ),
291  fBackgroundClass ( 0 ),
292  fSplRefS ( 0 ),
293  fSplRefB ( 0 ),
294  fSplTrainRefS ( 0 ),
295  fSplTrainRefB ( 0 ),
296  fSetupCompleted (kFALSE)
297 {
298  SetTestvarName();
300 
301 // // default extension for weight files
302 }
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 /// constructor used for Testing + Application of the MVA,
306 /// only (no training), using given WeightFiles
307 
309  DataSetInfo& dsi,
310  const TString& weightFile ) :
311  IMethod(),
312  Configurable(""),
313  fTmpEvent ( 0 ),
314  fRanking ( 0 ),
315  fInputVars ( 0 ),
316  fAnalysisType ( Types::kNoAnalysisType ),
317  fRegressionReturnVal ( 0 ),
318  fMulticlassReturnVal ( 0 ),
319  fDataSetInfo ( dsi ),
320  fSignalReferenceCut ( 0.5 ),
321  fVariableTransformType ( Types::kSignal ),
322  fJobName ( "" ),
323  fMethodName ( "MethodBase" ),
324  fMethodType ( methodType ),
325  fTestvar ( "" ),
326  fTMVATrainingVersion ( 0 ),
327  fROOTTrainingVersion ( 0 ),
329  fBaseDir ( 0 ),
330  fMethodBaseDir ( 0 ),
331  fFile ( 0 ),
334  fWeightFile ( weightFile ),
335  fEffS ( 0 ),
336  fDefaultPDF ( 0 ),
337  fMVAPdfS ( 0 ),
338  fMVAPdfB ( 0 ),
339  fSplS ( 0 ),
340  fSplB ( 0 ),
341  fSpleffBvsS ( 0 ),
342  fSplTrainS ( 0 ),
343  fSplTrainB ( 0 ),
344  fSplTrainEffBvsS ( 0 ),
345  fVarTransformString ( "None" ),
347  fTransformation ( dsi, "" ),
348  fVerbose ( kFALSE ),
349  fVerbosityLevelString ( "Default" ),
350  fHelp ( kFALSE ),
351  fHasMVAPdfs ( kFALSE ),
353  fSignalClass ( 0 ),
354  fBackgroundClass ( 0 ),
355  fSplRefS ( 0 ),
356  fSplRefB ( 0 ),
357  fSplTrainRefS ( 0 ),
358  fSplTrainRefB ( 0 ),
360 {
362 // // constructor used for Testing + Application of the MVA,
363 // // only (no training), using given WeightFiles
364 }
365 
366 ////////////////////////////////////////////////////////////////////////////////
367 /// destructor
368 
370 {
371  // destructor
372  if (!fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
373 
374  // destructor
375  if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
376  if (fRanking != 0) delete fRanking;
377 
378  // PDFs
379  if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
380  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
381  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
382 
383  // Splines
384  if (fSplS) { delete fSplS; fSplS = 0; }
385  if (fSplB) { delete fSplB; fSplB = 0; }
386  if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
387  if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
388  if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
389  if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
390  if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
392 
393  for (Int_t i = 0; i < 2; i++ ) {
394  if (fEventCollections.at(i)) {
395  for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
396  it != fEventCollections.at(i)->end(); it++) {
397  delete (*it);
398  }
399  delete fEventCollections.at(i);
400  fEventCollections.at(i) = 0;
401  }
402  }
403 
406 }
407 
408 ////////////////////////////////////////////////////////////////////////////////
409 /// setup of methods
410 
412 {
413  // setup of methods
414 
415  if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
416  InitBase();
418  Init();
419  DeclareOptions();
421 }
422 
423 ////////////////////////////////////////////////////////////////////////////////
424 /// process all options
425 /// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
426 /// (sometimes, eg, fitters are used which can only be implemented during training phase)
427 
429 {
431  ProcessOptions();
432 }
433 
434 ////////////////////////////////////////////////////////////////////////////////
435 /// check may be overridden by derived class
436 /// (sometimes, eg, fitters are used which can only be implemented during training phase)
437 
439 {
441 }
442 
443 ////////////////////////////////////////////////////////////////////////////////
444 /// default initialization called by all constructors
445 
447 {
448  SetConfigDescription( "Configuration options for classifier architecture and tuning" );
449 
453 
454  fSplTrainS = 0;
455  fSplTrainB = 0;
456  fSplTrainEffBvsS = 0;
457  fMeanS = -1;
458  fMeanB = -1;
459  fRmsS = -1;
460  fRmsB = -1;
461  fXmin = DBL_MAX;
462  fXmax = -DBL_MAX;
464  fSplRefS = 0;
465  fSplRefB = 0;
466 
467  fTrainTime = -1.;
468  fTestTime = -1.;
469 
470  fRanking = 0;
471 
472  // temporary until the move to DataSet is complete
473  fInputVars = new std::vector<TString>;
474  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
475  fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
476  }
479 
480  fEventCollections.resize( 2 );
481  fEventCollections.at(0) = 0;
482  fEventCollections.at(1) = 0;
483 
484  // retrieve signal and background class index
485  if (DataInfo().GetClassInfo("Signal") != 0) {
486  fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
487  }
488  if (DataInfo().GetClassInfo("Background") != 0) {
489  fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
490  }
491 
492  SetConfigDescription( "Configuration options for MVA method" );
493  SetConfigName( TString("Method") + GetMethodTypeName() );
494 }
495 
496 ////////////////////////////////////////////////////////////////////////////////
497 /// define the options (their key words) that can be set in the option string
498 /// here the options valid for ALL MVA methods are declared.
499 ///
500 /// know options:
501 ///
502 /// - VariableTransform=None,Decorrelated,PCA to use transformed variables
503 /// instead of the original ones
504 /// - VariableTransformType=Signal,Background which decorrelation matrix to use
505 /// in the method. Only the Likelihood
506 /// Method can make proper use of independent
507 /// transformations of signal and background
508 /// - fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
509 /// - fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
510 /// - fHasMVAPdfs create PDFs for the MVA outputs
511 /// - V for Verbose output (!V) for non verbos
512 /// - H for Help message
513 
515 {
516  DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
517 
518  DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
519  AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
520  AddPreDefVal( TString("Debug") );
521  AddPreDefVal( TString("Verbose") );
522  AddPreDefVal( TString("Info") );
523  AddPreDefVal( TString("Warning") );
524  AddPreDefVal( TString("Error") );
525  AddPreDefVal( TString("Fatal") );
526 
527  // If True (default): write all training results (weights) as text files only;
528  // if False: write also in ROOT format (not available for all methods - will abort if not
529  fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
530  fNormalise = kFALSE; // OBSOLETE !!!
531 
532  DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
533 
534  DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
535 
536  DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
537 
538  DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
539  "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
540 }
541 
542 ////////////////////////////////////////////////////////////////////////////////
543 /// the option string is decoded, for available options see "DeclareOptions"
544 
546 {
547  if (HasMVAPdfs()) {
548  // setting the default bin num... maybe should be static ? ==> Please no static (JS)
549  // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
550  // reading every PDF's definition and passing the option string to the next one to be read and marked
551  fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
555  fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
559  fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
563 
564  // the final marked option string is written back to the original methodbase
566  }
567 
569  DataInfo(),
571  Log() );
572 
573  if (!HasMVAPdfs()) {
574  if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
575  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
576  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
577  }
578 
579  if (fVerbose) { // overwrites other settings
580  fVerbosityLevelString = TString("Verbose");
581  Log().SetMinType( kVERBOSE );
582  }
583  else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
584  else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
585  else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
586  else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
587  else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
588  else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
589  else if (fVerbosityLevelString != "Default" ) {
590  Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
591  << fVerbosityLevelString << "' unknown." << Endl;
592  }
594 }
595 
596 ////////////////////////////////////////////////////////////////////////////////
597 /// options that are used ONLY for the READER to ensure backward compatibility
598 /// they are hence without any effect (the reader is only reading the training
599 /// options that HAD been used at the training of the .xml weight file at hand
600 
602 {
603  DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
604  DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
605  DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
606  "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
607  AddPreDefVal( TString("Signal") );
608  AddPreDefVal( TString("Background") );
609  DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
610  // Why on earth ?? was this here? Was the verbosity level option meant to 'disappear? Not a good idea i think..
611  // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
612  // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
613  // AddPreDefVal( TString("Debug") );
614  // AddPreDefVal( TString("Verbose") );
615  // AddPreDefVal( TString("Info") );
616  // AddPreDefVal( TString("Warning") );
617  // AddPreDefVal( TString("Error") );
618  // AddPreDefVal( TString("Fatal") );
619  DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
620  DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
621 }
622 
623 
624 ////////////////////////////////////////////////////////////////////////////////
625 /// call the Optimizer with the set of parameters and ranges that
626 /// are meant to be tuned.
627 
628 std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
629 {
630  // this is just a dummy... needs to be implemented for each method
631  // individually (as long as we don't have it automatized via the
632  // configuration string
633 
634  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
635  << GetName() << Endl;
636  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
637 
638  std::map<TString,Double_t> tunedParameters;
639  tunedParameters.size(); // just to get rid of "unused" warning
640  return tunedParameters;
641 
642 }
643 
644 ////////////////////////////////////////////////////////////////////////////////
645 /// set the tuning parameters according to the argument
646 /// This is just a dummy .. have a look at the MethodBDT how you could
647 /// perhaps implement the same thing for the other Classifiers..
648 
649 void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
650 {
651 }
652 
653 ////////////////////////////////////////////////////////////////////////////////
654 
656 {
658  Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
659 
660  // train the MVA method
661  if (Help()) PrintHelpMessage();
662 
663  // all histograms should be created in the method's subdirectory
664  if(!IsSilentFile()) BaseDir()->cd();
665 
666  // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
667  // needed for this classifier
669 
670  // call training of derived MVA
671  Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
672  << "Begin training" << Endl;
673  Long64_t nEvents = Data()->GetNEvents();
674  Timer traintimer( nEvents, GetName(), kTRUE );
675  Train();
676  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
677  << "\tEnd of training " << Endl;
678  SetTrainTime(traintimer.ElapsedSeconds());
679  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
680  << "Elapsed time for training with " << nEvents << " events: "
681  << traintimer.GetElapsedTime() << " " << Endl;
682 
683  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
684  << "\tCreate MVA output for ";
685 
686  // create PDFs for the signal and background MVA distributions (if required)
687  if (DoMulticlass()) {
688  Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
690  }
691  else if (!DoRegression()) {
692 
693  Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
695  if (HasMVAPdfs()) {
696  CreateMVAPdfs();
698  }
699 
700  } else {
701 
702  Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
704 
705  if (HasMVAPdfs() ) {
706  Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
707  CreateMVAPdfs();
708  }
709  }
710 
711  // write the current MVA state into stream
712  // produced are one text file and one ROOT file
714 
715  // produce standalone make class (presently only supported for classification)
716  if ((!DoRegression()) && (fModelPersistence)) MakeClass();
717 
718  // write additional monitoring histograms to main target file (not the weight file)
719  // again, make sure the histograms go into the method's subdirectory
720  if(!IsSilentFile())
721  {
722  BaseDir()->cd();
724  }
725 }
726 
727 ////////////////////////////////////////////////////////////////////////////////
728 
730 {
731  if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
732  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
734  bool truncate = false;
735  TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
736  stddev = sqrt(h1->GetMean());
737  truncate = true;
738  Double_t yq[1], xq[]={0.9};
739  h1->GetQuantiles(1,yq,xq);
740  TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
741  stddev90Percent = sqrt(h2->GetMean());
742  delete h1;
743  delete h2;
744 }
745 
746 ////////////////////////////////////////////////////////////////////////////////
747 /// prepare tree branch with the method's discriminating variable
748 
750 {
751  Data()->SetCurrentType(type);
752 
753  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
754 
756 
757  Long64_t nEvents = Data()->GetNEvents();
758 
759  // use timer
760  Timer timer( nEvents, GetName(), kTRUE );
761  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
762  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
763 
764  regRes->Resize( nEvents );
765  for (Int_t ievt=0; ievt<nEvents; ievt++) {
766  Data()->SetCurrentEvent(ievt);
767  std::vector< Float_t > vals = GetRegressionValues();
768  regRes->SetValue( vals, ievt );
769  timer.DrawProgressBar( ievt );
770  }
771 
772  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
773  << "Elapsed time for evaluation of " << nEvents << " events: "
774  << timer.GetElapsedTime() << " " << Endl;
775 
776  // store time used for testing
777  if (type==Types::kTesting)
778  SetTestTime(timer.ElapsedSeconds());
779 
780  TString histNamePrefix(GetTestvarName());
781  histNamePrefix += (type==Types::kTraining?"train":"test");
782  regRes->CreateDeviationHistograms( histNamePrefix );
783 }
784 
785 ////////////////////////////////////////////////////////////////////////////////
786 /// prepare tree branch with the method's discriminating variable
787 
789 {
790  Data()->SetCurrentType(type);
791 
792  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
793 
794  ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
795  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
796 
797  Long64_t nEvents = Data()->GetNEvents();
798 
799  // use timer
800  Timer timer( nEvents, GetName(), kTRUE );
801 
802  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
803  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
804 
805  resMulticlass->Resize( nEvents );
806  for (Int_t ievt=0; ievt<nEvents; ievt++) {
807  Data()->SetCurrentEvent(ievt);
808  std::vector< Float_t > vals = GetMulticlassValues();
809  resMulticlass->SetValue( vals, ievt );
810  timer.DrawProgressBar( ievt );
811  }
812 
813  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
814  << "Elapsed time for evaluation of " << nEvents << " events: "
815  << timer.GetElapsedTime() << " " << Endl;
816 
817  // store time used for testing
818  if (type==Types::kTesting)
819  SetTestTime(timer.ElapsedSeconds());
820 
821  TString histNamePrefix(GetTestvarName());
822  histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
823  resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
824  resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
825 }
826 
827 ////////////////////////////////////////////////////////////////////////////////
828 
829 void TMVA::MethodBase::NoErrorCalc(Double_t* const err, Double_t* const errUpper) {
830  if (err) *err=-1;
831  if (errUpper) *errUpper=-1;
832 }
833 
834 ////////////////////////////////////////////////////////////////////////////////
835 
836 Double_t TMVA::MethodBase::GetMvaValue( const Event* const ev, Double_t* err, Double_t* errUpper ) {
837  fTmpEvent = ev;
838  Double_t val = GetMvaValue(err, errUpper);
839  fTmpEvent = 0;
840  return val;
841 }
842 
843 ////////////////////////////////////////////////////////////////////////////////
844 /// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
845 /// for a quick determination if an event would be selected as signal or background
846 
849 }
850 ////////////////////////////////////////////////////////////////////////////////
851 /// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
852 /// for a quick determination if an event with this mva output value would be selected as signal or background
853 
856 }
857 
858 ////////////////////////////////////////////////////////////////////////////////
859 /// prepare tree branch with the method's discriminating variable
860 
862 {
863  Data()->SetCurrentType(type);
864 
865  ResultsClassification* clRes =
867 
868  Long64_t nEvents = Data()->GetNEvents();
869  clRes->Resize( nEvents );
870 
871  // use timer
872  Timer timer( nEvents, GetName(), kTRUE );
873  std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
874 
875  // store time used for testing
876  if (type==Types::kTesting)
877  SetTestTime(timer.ElapsedSeconds());
878 
879  // load mva values to results object
880  for (Int_t ievt=0; ievt<nEvents; ievt++) {
881  clRes->SetValue( mvaValues[ievt], ievt );
882  }
883 }
884 
885 ////////////////////////////////////////////////////////////////////////////////
886 /// get all the MVA values for the events of the current Data type
887 std::vector<Double_t> TMVA::MethodBase::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
888 {
889 
890  Long64_t nEvents = Data()->GetNEvents();
891  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
892  if (firstEvt < 0) firstEvt = 0;
893  std::vector<Double_t> values(lastEvt-firstEvt);
894  // log in case of looping on all the events
895  nEvents = values.size();
896 
897  // use timer
898  Timer timer( nEvents, GetName(), kTRUE );
899 
900  if (logProgress)
901  Log() << kHEADER<<Form("[%s] : ",DataInfo().GetName())<< "Evaluation of " << GetMethodName() << " on "
902  << (Data()->GetCurrentType()==Types::kTraining?"training":"testing") << " sample (" << nEvents << " events)" << Endl;
903 
904 
905  for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
906  Data()->SetCurrentEvent(ievt);
907  values[ievt] = GetMvaValue();
908 
909  // print progress
910  if (logProgress) {
911  Int_t modulo = Int_t(nEvents/100);
912  if (modulo <= 0 ) modulo = 1;
913  if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
914  }
915  }
916  if (logProgress) {
917  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
918  << "Elapsed time for evaluation of " << nEvents << " events: "
919  << timer.GetElapsedTime() << " " << Endl;
920  }
921 
922  return values;
923 }
924 
925 ////////////////////////////////////////////////////////////////////////////////
926 /// prepare tree branch with the method's discriminating variable
927 
929 {
930  Data()->SetCurrentType(type);
931 
932  ResultsClassification* mvaProb =
934 
935  Long64_t nEvents = Data()->GetNEvents();
936 
937  // use timer
938  Timer timer( nEvents, GetName(), kTRUE );
939 
940  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
941  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
942 
943  mvaProb->Resize( nEvents );
944  for (Int_t ievt=0; ievt<nEvents; ievt++) {
945 
946  Data()->SetCurrentEvent(ievt);
947  Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
948  if (proba < 0) break;
949  mvaProb->SetValue( proba, ievt );
950 
951  // print progress
952  Int_t modulo = Int_t(nEvents/100);
953  if (modulo <= 0 ) modulo = 1;
954  if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
955  }
956 
957  Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
958  << "Elapsed time for evaluation of " << nEvents << " events: "
959  << timer.GetElapsedTime() << " " << Endl;
960 }
961 
962 ////////////////////////////////////////////////////////////////////////////////
963 /// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
964 ///
965 /// - bias = average deviation
966 /// - dev = average absolute deviation
967 /// - rms = rms of deviation
968 
970  Double_t& dev, Double_t& devT,
971  Double_t& rms, Double_t& rmsT,
972  Double_t& mInf, Double_t& mInfT,
973  Double_t& corr,
975 {
976  Types::ETreeType savedType = Data()->GetCurrentType();
977  Data()->SetCurrentType(type);
978 
979  bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
980  Double_t sumw = 0;
981  Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
982  const Int_t nevt = GetNEvents();
983  Float_t* rV = new Float_t[nevt];
984  Float_t* tV = new Float_t[nevt];
985  Float_t* wV = new Float_t[nevt];
986  Float_t xmin = 1e30, xmax = -1e30;
987  for (Long64_t ievt=0; ievt<nevt; ievt++) {
988 
989  const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
990  Float_t t = ev->GetTarget(0);
991  Float_t w = ev->GetWeight();
993  Float_t d = (r-t);
994 
995  // find min/max
996  xmin = TMath::Min(xmin, TMath::Min(t, r));
997  xmax = TMath::Max(xmax, TMath::Max(t, r));
998 
999  // store for truncated RMS computation
1000  rV[ievt] = r;
1001  tV[ievt] = t;
1002  wV[ievt] = w;
1003 
1004  // compute deviation-squared
1005  sumw += w;
1006  bias += w * d;
1007  dev += w * TMath::Abs(d);
1008  rms += w * d * d;
1009 
1010  // compute correlation between target and regression estimate
1011  m1 += t*w; s1 += t*t*w;
1012  m2 += r*w; s2 += r*r*w;
1013  s12 += t*r;
1014  }
1015 
1016  // standard quantities
1017  bias /= sumw;
1018  dev /= sumw;
1019  rms /= sumw;
1020  rms = TMath::Sqrt(rms - bias*bias);
1021 
1022  // correlation
1023  m1 /= sumw;
1024  m2 /= sumw;
1025  corr = s12/sumw - m1*m2;
1026  corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1027 
1028  // create histogram required for computation of mutual information
1029  TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1030  TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1031 
1032  // compute truncated RMS and fill histogram
1033  Double_t devMax = bias + 2*rms;
1034  Double_t devMin = bias - 2*rms;
1035  sumw = 0;
1036  int ic=0;
1037  for (Long64_t ievt=0; ievt<nevt; ievt++) {
1038  Float_t d = (rV[ievt] - tV[ievt]);
1039  hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1040  if (d >= devMin && d <= devMax) {
1041  sumw += wV[ievt];
1042  biasT += wV[ievt] * d;
1043  devT += wV[ievt] * TMath::Abs(d);
1044  rmsT += wV[ievt] * d * d;
1045  histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1046  ic++;
1047  }
1048  }
1049  biasT /= sumw;
1050  devT /= sumw;
1051  rmsT /= sumw;
1052  rmsT = TMath::Sqrt(rmsT - biasT*biasT);
1053  mInf = gTools().GetMutualInformation( *hist );
1054  mInfT = gTools().GetMutualInformation( *histT );
1055 
1056  delete hist;
1057  delete histT;
1058 
1059  delete [] rV;
1060  delete [] tV;
1061  delete [] wV;
1062 
1063  Data()->SetCurrentType(savedType);
1064 }
1065 
1066 
1067 ////////////////////////////////////////////////////////////////////////////////
1068 /// test multiclass classification
1069 
1071 {
1073  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1074  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test data..." << Endl;
1075  for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1076  resMulticlass->GetBestMultiClassCuts(icls);
1077  }
1078 }
1079 
1080 
1081 ////////////////////////////////////////////////////////////////////////////////
1082 /// initialization
1083 
1085 {
1087 
1088  ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
1090 
1091  // sanity checks: tree must exist, and theVar must be in tree
1092  if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1093  Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1094  << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1095  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1096  << " not found in tree" << Endl;
1097  }
1098 
1099  // basic statistics operations are made in base class
1102 
1103  // choose reasonable histogram ranges, by removing outliers
1104  Double_t nrms = 10;
1105  fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1106  fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1107 
1108  // determine cut orientation
1110 
1111  // fill 2 types of histograms for the various analyses
1112  // this one is for actual plotting
1113 
1114  Double_t sxmax = fXmax+0.00001;
1115 
1116  // classifier response distributions for training sample
1117  // MVA plots used for graphics representation (signal)
1118  TString TestvarName;
1119  if(IsSilentFile())
1120  {
1121  TestvarName=Form("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1122  }else
1123  {
1124  TestvarName=GetTestvarName();
1125  }
1126  TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1127  TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1128  mvaRes->Store(mva_s, "MVA_S");
1129  mvaRes->Store(mva_b, "MVA_B");
1130  mva_s->Sumw2();
1131  mva_b->Sumw2();
1132 
1133  TH1* proba_s = 0;
1134  TH1* proba_b = 0;
1135  TH1* rarity_s = 0;
1136  TH1* rarity_b = 0;
1137  if (HasMVAPdfs()) {
1138  // P(MVA) plots used for graphics representation
1139  proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1140  proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1141  mvaRes->Store(proba_s, "Prob_S");
1142  mvaRes->Store(proba_b, "Prob_B");
1143  proba_s->Sumw2();
1144  proba_b->Sumw2();
1145 
1146  // R(MVA) plots used for graphics representation
1147  rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1148  rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1149  mvaRes->Store(rarity_s, "Rar_S");
1150  mvaRes->Store(rarity_b, "Rar_B");
1151  rarity_s->Sumw2();
1152  rarity_b->Sumw2();
1153  }
1154 
1155  // MVA plots used for efficiency calculations (large number of bins)
1156  TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1157  TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1158  mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1159  mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1160  mva_eff_s->Sumw2();
1161  mva_eff_b->Sumw2();
1162 
1163  // fill the histograms
1164 
1165  ResultsClassification* mvaProb = dynamic_cast<ResultsClassification*>
1167 
1168  Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1169  if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1170  std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1171 
1172  //LM: this is needed to avoid crashes in ROOCCURVE
1173  if ( mvaRes->GetSize() != GetNEvents() ) {
1174  Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1175  assert(mvaRes->GetSize() == GetNEvents());
1176  }
1177 
1178  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1179 
1180  const Event* ev = GetEvent(ievt);
1181  Float_t v = (*mvaRes)[ievt][0];
1182  Float_t w = ev->GetWeight();
1183 
1184  if (DataInfo().IsSignal(ev)) {
1185  mvaResTypes->push_back(kTRUE);
1186  mva_s ->Fill( v, w );
1187  if (mvaProb) {
1188  proba_s->Fill( (*mvaProb)[ievt][0], w );
1189  rarity_s->Fill( GetRarity( v ), w );
1190  }
1191 
1192  mva_eff_s ->Fill( v, w );
1193  }
1194  else {
1195  mvaResTypes->push_back(kFALSE);
1196  mva_b ->Fill( v, w );
1197  if (mvaProb) {
1198  proba_b->Fill( (*mvaProb)[ievt][0], w );
1199  rarity_b->Fill( GetRarity( v ), w );
1200  }
1201  mva_eff_b ->Fill( v, w );
1202  }
1203  }
1204 
1205  // uncomment those (and several others if you want unnormalized output
1206  gTools().NormHist( mva_s );
1207  gTools().NormHist( mva_b );
1208  gTools().NormHist( proba_s );
1209  gTools().NormHist( proba_b );
1210  gTools().NormHist( rarity_s );
1211  gTools().NormHist( rarity_b );
1212  gTools().NormHist( mva_eff_s );
1213  gTools().NormHist( mva_eff_b );
1214 
1215  // create PDFs from histograms, using default splines, and no additional smoothing
1216  if (fSplS) { delete fSplS; fSplS = 0; }
1217  if (fSplB) { delete fSplB; fSplB = 0; }
1218  fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1219  fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1220 }
1221 
1222 ////////////////////////////////////////////////////////////////////////////////
1223 /// general method used in writing the header of the weight files where
1224 /// the used variables, variable transformation type etc. is specified
1225 
1226 void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1227 {
1228  TString prefix = "";
1229  UserGroup_t * userInfo = gSystem->GetUserInfo();
1230 
1231  tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1232  tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1233  tf.setf(std::ios::left);
1234  tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1235  << GetTrainingTMVAVersionCode() << "]" << std::endl;
1236  tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1237  << GetTrainingROOTVersionCode() << "]" << std::endl;
1238  tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1239  tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1240  tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1241  tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1242  tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1243 
1244  TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1245 
1246  tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1247  tf << prefix << std::endl;
1248 
1249  delete userInfo;
1250 
1251  // First write all options
1252  tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1253  WriteOptionsToStream( tf, prefix );
1254  tf << prefix << std::endl;
1255 
1256  // Second write variable info
1257  tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1258  WriteVarsToStream( tf, prefix );
1259  tf << prefix << std::endl;
1260 }
1261 
1262 ////////////////////////////////////////////////////////////////////////////////
1263 /// xml writing
1264 
1265 void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1266 {
1267  void* it = gTools().AddChild(gi,"Info");
1268  gTools().AddAttr(it,"name", name);
1269  gTools().AddAttr(it,"value", value);
1270 }
1271 
1272 ////////////////////////////////////////////////////////////////////////////////
1273 
1275  if (analysisType == Types::kRegression) {
1276  AddRegressionOutput( type );
1277  } else if (analysisType == Types::kMulticlass) {
1278  AddMulticlassOutput( type );
1279  } else {
1280  AddClassifierOutput( type );
1281  if (HasMVAPdfs())
1282  AddClassifierOutputProb( type );
1283  }
1284 }
1285 
1286 ////////////////////////////////////////////////////////////////////////////////
1287 /// general method used in writing the header of the weight files where
1288 /// the used variables, variable transformation type etc. is specified
1289 
1290 void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1291 {
1292  if (!parent) return;
1293 
1294  UserGroup_t* userInfo = gSystem->GetUserInfo();
1295 
1296  void* gi = gTools().AddChild(parent, "GeneralInfo");
1297  AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1298  AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1299  AddInfoItem( gi, "Creator", userInfo->fUser);
1300  TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1301  AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1302  AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1303  AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1304  AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1305 
1306  Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1307  TString analysisType((aType==Types::kRegression) ? "Regression" :
1308  (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1309  AddInfoItem( gi, "AnalysisType", analysisType );
1310  delete userInfo;
1311 
1312  // write options
1313  AddOptionsXMLTo( parent );
1314 
1315  // write variable info
1316  AddVarsXMLTo( parent );
1317 
1318  // write spectator info
1319  if (fModelPersistence)
1320  AddSpectatorsXMLTo( parent );
1321 
1322  // write class info if in multiclass mode
1323  AddClassesXMLTo(parent);
1324 
1325  // write target info if in regression mode
1326  if (DoRegression()) AddTargetsXMLTo(parent);
1327 
1328  // write transformations
1329  GetTransformationHandler(false).AddXMLTo( parent );
1330 
1331  // write MVA variable distributions
1332  void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1333  if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1334  if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1335 
1336  // write weights
1337  AddWeightsXMLTo( parent );
1338 }
1339 
1340 ////////////////////////////////////////////////////////////////////////////////
1341 /// write reference MVA distributions (and other information)
1342 /// to a ROOT type weight file
1343 
1345 {
1346  Bool_t addDirStatus = TH1::AddDirectoryStatus();
1347  TH1::AddDirectory( 0 ); // this avoids the binding of the hists in PDF to the current ROOT file
1348  fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1349  fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1350 
1351  TH1::AddDirectory( addDirStatus );
1352 
1353  ReadWeightsFromStream( rf );
1354 
1355  SetTestvarName();
1356 }
1357 
1358 ////////////////////////////////////////////////////////////////////////////////
1359 /// write options and weights to file
1360 /// note that each one text file for the main configuration information
1361 /// and one ROOT file for ROOT objects are created
1362 
1364 {
1365  // ---- create the text file
1366  TString tfname( GetWeightFileName() );
1367 
1368  // writing xml file
1369  TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1370  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1371  << "Creating xml weight file: "
1372  << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1373  void* doc = gTools().xmlengine().NewDoc();
1374  void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1375  gTools().xmlengine().DocSetRootElement(doc,rootnode);
1376  gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1377  WriteStateToXML(rootnode);
1378  gTools().xmlengine().SaveDoc(doc,xmlfname);
1379  gTools().xmlengine().FreeDoc(doc);
1380 }
1381 
1382 ////////////////////////////////////////////////////////////////////////////////
1383 /// Function to write options and weights to file
1384 
1386 {
1387  // get the filename
1388 
1389  TString tfname(GetWeightFileName());
1390 
1391  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1392  << "Reading weight file: "
1393  << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1394 
1395  if (tfname.EndsWith(".xml") ) {
1396 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,29,0)
1397  void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1398 #else
1399  void* doc = gTools().xmlengine().ParseFile(tfname);
1400 #endif
1401  void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1402  ReadStateFromXML(rootnode);
1403  gTools().xmlengine().FreeDoc(doc);
1404  }
1405  else {
1406  std::filebuf fb;
1407  fb.open(tfname.Data(),std::ios::in);
1408  if (!fb.is_open()) { // file not found --> Error
1409  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1410  << "Unable to open input weight file: " << tfname << Endl;
1411  }
1412  std::istream fin(&fb);
1413  ReadStateFromStream(fin);
1414  fb.close();
1415  }
1416  if (!fTxtWeightsOnly) {
1417  // ---- read the ROOT file
1418  TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1419  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1420  << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1421  TFile* rfile = TFile::Open( rfname, "READ" );
1422  ReadStateFromStream( *rfile );
1423  rfile->Close();
1424  }
1425 }
1426 ////////////////////////////////////////////////////////////////////////////////
1427 /// for reading from memory
1428 
1429 void TMVA::MethodBase::ReadStateFromXMLString( const char* xmlstr ) {
1430 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,26,00)
1431  void* doc = gTools().xmlengine().ParseString(xmlstr);
1432  void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1433  ReadStateFromXML(rootnode);
1434  gTools().xmlengine().FreeDoc(doc);
1435 #else
1436  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Method MethodBase::ReadStateFromXMLString( const char* xmlstr = "
1437  << xmlstr << " ) is not available for ROOT versions prior to 5.26/00." << Endl;
1438 #endif
1439 
1440  return;
1441 }
1442 
1443 ////////////////////////////////////////////////////////////////////////////////
1444 
1445 void TMVA::MethodBase::ReadStateFromXML( void* methodNode )
1446 {
1447  TString fullMethodName;
1448  gTools().ReadAttr( methodNode, "Method", fullMethodName );
1449  fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1450 
1451  // update logger
1452  Log().SetSource( GetName() );
1453  Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1454  << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1455 
1456  // after the method name is read, the testvar can be set
1457  SetTestvarName();
1458 
1459  TString nodeName("");
1460  void* ch = gTools().GetChild(methodNode);
1461  while (ch!=0) {
1462  nodeName = TString( gTools().GetName(ch) );
1463 
1464  if (nodeName=="GeneralInfo") {
1465  // read analysis type
1466 
1467  TString name(""),val("");
1468  void* antypeNode = gTools().GetChild(ch);
1469  while (antypeNode) {
1470  gTools().ReadAttr( antypeNode, "name", name );
1471 
1472  if (name == "TrainingTime")
1473  gTools().ReadAttr( antypeNode, "value", fTrainTime );
1474 
1475  if (name == "AnalysisType") {
1476  gTools().ReadAttr( antypeNode, "value", val );
1477  val.ToLower();
1478  if (val == "regression" ) SetAnalysisType( Types::kRegression );
1479  else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1480  else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1481  else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1482  }
1483 
1484  if (name == "TMVA Release" || name == "TMVA") {
1485  TString s;
1486  gTools().ReadAttr( antypeNode, "value", s);
1487  fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1488  Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1489  }
1490 
1491  if (name == "ROOT Release" || name == "ROOT") {
1492  TString s;
1493  gTools().ReadAttr( antypeNode, "value", s);
1494  fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1495  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1496  << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1497  }
1498  antypeNode = gTools().GetNextChild(antypeNode);
1499  }
1500  }
1501  else if (nodeName=="Options") {
1502  ReadOptionsFromXML(ch);
1503  ParseOptions();
1504 
1505  }
1506  else if (nodeName=="Variables") {
1508  }
1509  else if (nodeName=="Spectators") {
1511  }
1512  else if (nodeName=="Classes") {
1513  if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1514  }
1515  else if (nodeName=="Targets") {
1516  if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1517  }
1518  else if (nodeName=="Transformations") {
1520  }
1521  else if (nodeName=="MVAPdfs") {
1522  TString pdfname;
1523  if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1524  if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1525  void* pdfnode = gTools().GetChild(ch);
1526  if (pdfnode) {
1527  gTools().ReadAttr(pdfnode, "Name", pdfname);
1528  fMVAPdfS = new PDF(pdfname);
1529  fMVAPdfS->ReadXML(pdfnode);
1530  pdfnode = gTools().GetNextChild(pdfnode);
1531  gTools().ReadAttr(pdfnode, "Name", pdfname);
1532  fMVAPdfB = new PDF(pdfname);
1533  fMVAPdfB->ReadXML(pdfnode);
1534  }
1535  }
1536  else if (nodeName=="Weights") {
1537  ReadWeightsFromXML(ch);
1538  }
1539  else {
1540  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1541  }
1542  ch = gTools().GetNextChild(ch);
1543 
1544  }
1545 
1546  // update transformation handler
1547  if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1548 }
1549 
1550 ////////////////////////////////////////////////////////////////////////////////
1551 /// read the header from the weight files of the different MVA methods
1552 
1553 void TMVA::MethodBase::ReadStateFromStream( std::istream& fin )
1554 {
1555  char buf[512];
1556 
1557  // when reading from stream, we assume the files are produced with TMVA<=397
1559 
1560 
1561  // first read the method name
1562  GetLine(fin,buf);
1563  while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1564  TString namestr(buf);
1565 
1566  TString methodType = namestr(0,namestr.Index("::"));
1567  methodType = methodType(methodType.Last(' '),methodType.Length());
1568  methodType = methodType.Strip(TString::kLeading);
1569 
1570  TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1571  methodName = methodName.Strip(TString::kLeading);
1572  if (methodName == "") methodName = methodType;
1573  fMethodName = methodName;
1574 
1575  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1576 
1577  // update logger
1578  Log().SetSource( GetName() );
1579 
1580  // now the question is whether to read the variables first or the options (well, of course the order
1581  // of writing them needs to agree)
1582  //
1583  // the option "Decorrelation" is needed to decide if the variables we
1584  // read are decorrelated or not
1585  //
1586  // the variables are needed by some methods (TMLP) to build the NN
1587  // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1588  // we read the variables, and then we process the options
1589 
1590  // now read all options
1591  GetLine(fin,buf);
1592  while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1593  ReadOptionsFromStream(fin);
1594  ParseOptions();
1595 
1596  // Now read variable info
1597  fin.getline(buf,512);
1598  while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1599  ReadVarsFromStream(fin);
1600 
1601  // now we process the options (of the derived class)
1602  ProcessOptions();
1603 
1604  if (IsNormalised()) {
1607  norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1608  }
1609  VariableTransformBase *varTrafo(0), *varTrafo2(0);
1610  if ( fVarTransformString == "None") {
1611  if (fUseDecorr)
1613  } else if ( fVarTransformString == "Decorrelate" ) {
1615  } else if ( fVarTransformString == "PCA" ) {
1617  } else if ( fVarTransformString == "Uniform" ) {
1618  varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1619  } else if ( fVarTransformString == "Gauss" ) {
1621  } else if ( fVarTransformString == "GaussDecorr" ) {
1624  } else {
1625  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1626  << fVarTransformString << "' unknown." << Endl;
1627  }
1628  // Now read decorrelation matrix if available
1629  if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1630  fin.getline(buf,512);
1631  while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1632  if (varTrafo) {
1634  varTrafo->ReadTransformationFromStream(fin, trafo );
1635  }
1636  if (varTrafo2) {
1638  varTrafo2->ReadTransformationFromStream(fin, trafo );
1639  }
1640  }
1641 
1642 
1643  if (HasMVAPdfs()) {
1644  // Now read the MVA PDFs
1645  fin.getline(buf,512);
1646  while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1647  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1648  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1649  fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1650  fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1653 
1654  fin >> *fMVAPdfS;
1655  fin >> *fMVAPdfB;
1656  }
1657 
1658  // Now read weights
1659  fin.getline(buf,512);
1660  while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1661  fin.getline(buf,512);
1662  ReadWeightsFromStream( fin );;
1663 
1664  // update transformation handler
1665  if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1666 
1667 }
1668 
1669 ////////////////////////////////////////////////////////////////////////////////
1670 /// write the list of variables (name, min, max) for a given data
1671 /// transformation method to the stream
1672 
1673 void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1674 {
1675  o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1676  std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1677  for (; varIt!=DataInfo().GetVariableInfos().end(); varIt++) { o << prefix; varIt->WriteToStream(o); }
1678  o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1679  varIt = DataInfo().GetSpectatorInfos().begin();
1680  for (; varIt!=DataInfo().GetSpectatorInfos().end(); varIt++) { o << prefix; varIt->WriteToStream(o); }
1681 }
1682 
1683 ////////////////////////////////////////////////////////////////////////////////
1684 /// Read the variables (name, min, max) for a given data
1685 /// transformation method from the stream. In the stream we only
1686 /// expect the limits which will be set
1687 
1688 void TMVA::MethodBase::ReadVarsFromStream( std::istream& istr )
1689 {
1690  TString dummy;
1691  UInt_t readNVar;
1692  istr >> dummy >> readNVar;
1693 
1694  if (readNVar!=DataInfo().GetNVariables()) {
1695  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1696  << " while there are " << readNVar << " variables declared in the file"
1697  << Endl;
1698  }
1699 
1700  // we want to make sure all variables are read in the order they are defined
1701  VariableInfo varInfo;
1702  std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1703  int varIdx = 0;
1704  for (; varIt!=DataInfo().GetVariableInfos().end(); varIt++, varIdx++) {
1705  varInfo.ReadFromStream(istr);
1706  if (varIt->GetExpression() == varInfo.GetExpression()) {
1707  varInfo.SetExternalLink((*varIt).GetExternalLink());
1708  (*varIt) = varInfo;
1709  }
1710  else {
1711  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1712  Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1713  Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1714  Log() << kINFO << "the correct working of the method):" << Endl;
1715  Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1716  Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1717  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1718  }
1719  }
1720 }
1721 
1722 ////////////////////////////////////////////////////////////////////////////////
1723 /// write variable info to XML
1724 
1725 void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1726 {
1727  void* vars = gTools().AddChild(parent, "Variables");
1728  gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1729 
1730  for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1731  VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1732  void* var = gTools().AddChild( vars, "Variable" );
1733  gTools().AddAttr( var, "VarIndex", idx );
1734  vi.AddToXML( var );
1735  }
1736 }
1737 
1738 ////////////////////////////////////////////////////////////////////////////////
1739 /// write spectator info to XML
1740 
1741 void TMVA::MethodBase::AddSpectatorsXMLTo( void* parent ) const
1742 {
1743  void* specs = gTools().AddChild(parent, "Spectators");
1744 
1745  UInt_t writeIdx=0;
1746  for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1747 
1748  VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1749 
1750  // we do not want to write spectators that are category-cuts,
1751  // except if the method is the category method and the spectators belong to it
1752  if (vi.GetVarType()=='C') continue;
1753 
1754  void* spec = gTools().AddChild( specs, "Spectator" );
1755  gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1756  vi.AddToXML( spec );
1757  }
1758  gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1759 }
1760 
1761 ////////////////////////////////////////////////////////////////////////////////
1762 /// write class info to XML
1763 
1764 void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1765 {
1766  UInt_t nClasses=DataInfo().GetNClasses();
1767 
1768  void* classes = gTools().AddChild(parent, "Classes");
1769  gTools().AddAttr( classes, "NClass", nClasses );
1770 
1771  for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1772  ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1773  TString className =classInfo->GetName();
1774  UInt_t classNumber=classInfo->GetNumber();
1775 
1776  void* classNode=gTools().AddChild(classes, "Class");
1777  gTools().AddAttr( classNode, "Name", className );
1778  gTools().AddAttr( classNode, "Index", classNumber );
1779  }
1780 }
1781 ////////////////////////////////////////////////////////////////////////////////
1782 /// write target info to XML
1783 
1784 void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1785 {
1786  void* targets = gTools().AddChild(parent, "Targets");
1787  gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1788 
1789  for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1790  VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1791  void* tar = gTools().AddChild( targets, "Target" );
1792  gTools().AddAttr( tar, "TargetIndex", idx );
1793  vi.AddToXML( tar );
1794  }
1795 }
1796 
1797 ////////////////////////////////////////////////////////////////////////////////
1798 /// read variable info from XML
1799 
1801 {
1802  UInt_t readNVar;
1803  gTools().ReadAttr( varnode, "NVar", readNVar);
1804 
1805  if (readNVar!=DataInfo().GetNVariables()) {
1806  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1807  << " while there are " << readNVar << " variables declared in the file"
1808  << Endl;
1809  }
1810 
1811  // we want to make sure all variables are read in the order they are defined
1812  VariableInfo readVarInfo, existingVarInfo;
1813  int varIdx = 0;
1814  void* ch = gTools().GetChild(varnode);
1815  while (ch) {
1816  gTools().ReadAttr( ch, "VarIndex", varIdx);
1817  existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1818  readVarInfo.ReadFromXML(ch);
1819 
1820  if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1821  readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1822  existingVarInfo = readVarInfo;
1823  }
1824  else {
1825  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1826  Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1827  Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1828  Log() << kINFO << "correct working of the method):" << Endl;
1829  Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1830  Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1831  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1832  }
1833  ch = gTools().GetNextChild(ch);
1834  }
1835 }
1836 
1837 ////////////////////////////////////////////////////////////////////////////////
1838 /// read spectator info from XML
1839 
1841 {
1842  UInt_t readNSpec;
1843  gTools().ReadAttr( specnode, "NSpec", readNSpec);
1844 
1845  if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1846  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1847  << " while there are " << readNSpec << " spectators declared in the file"
1848  << Endl;
1849  }
1850 
1851  // we want to make sure all variables are read in the order they are defined
1852  VariableInfo readSpecInfo, existingSpecInfo;
1853  int specIdx = 0;
1854  void* ch = gTools().GetChild(specnode);
1855  while (ch) {
1856  gTools().ReadAttr( ch, "SpecIndex", specIdx);
1857  existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1858  readSpecInfo.ReadFromXML(ch);
1859 
1860  if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1861  readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1862  existingSpecInfo = readSpecInfo;
1863  }
1864  else {
1865  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1866  Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1867  Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1868  Log() << kINFO << "correct working of the method):" << Endl;
1869  Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1870  Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1871  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1872  }
1873  ch = gTools().GetNextChild(ch);
1874  }
1875 }
1876 
1877 ////////////////////////////////////////////////////////////////////////////////
1878 /// read number of classes from XML
1879 
1881 {
1882  UInt_t readNCls;
1883  // coverity[tainted_data_argument]
1884  gTools().ReadAttr( clsnode, "NClass", readNCls);
1885 
1886  TString className="";
1887  UInt_t classIndex=0;
1888  void* ch = gTools().GetChild(clsnode);
1889  if (!ch) {
1890  for (UInt_t icls = 0; icls<readNCls;++icls) {
1891  TString classname = Form("class%i",icls);
1892  DataInfo().AddClass(classname);
1893 
1894  }
1895  }
1896  else{
1897  while (ch) {
1898  gTools().ReadAttr( ch, "Index", classIndex);
1899  gTools().ReadAttr( ch, "Name", className );
1900  DataInfo().AddClass(className);
1901 
1902  ch = gTools().GetNextChild(ch);
1903  }
1904  }
1905 
1906  // retrieve signal and background class index
1907  if (DataInfo().GetClassInfo("Signal") != 0) {
1908  fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1909  }
1910  else
1911  fSignalClass=0;
1912  if (DataInfo().GetClassInfo("Background") != 0) {
1913  fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
1914  }
1915  else
1916  fBackgroundClass=1;
1917 }
1918 
1919 ////////////////////////////////////////////////////////////////////////////////
1920 /// read target info from XML
1921 
1923 {
1924  UInt_t readNTar;
1925  gTools().ReadAttr( tarnode, "NTrgt", readNTar);
1926 
1927  int tarIdx = 0;
1928  TString expression;
1929  void* ch = gTools().GetChild(tarnode);
1930  while (ch) {
1931  gTools().ReadAttr( ch, "TargetIndex", tarIdx);
1932  gTools().ReadAttr( ch, "Expression", expression);
1933  DataInfo().AddTarget(expression,"","",0,0);
1934 
1935  ch = gTools().GetNextChild(ch);
1936  }
1937 }
1938 
1939 ////////////////////////////////////////////////////////////////////////////////
1940 /// returns the ROOT directory where info/histograms etc of the
1941 /// corresponding MVA method instance are stored
1942 
1944 {
1945  if (fBaseDir != 0) return fBaseDir;
1946  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
1947 
1948  TDirectory* methodDir = MethodBaseDir();
1949  if (methodDir==0)
1950  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
1951 
1952  TString defaultDir = GetMethodName();
1953  TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
1954  if(!sdir)
1955  {
1956  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
1957  sdir = methodDir->mkdir(defaultDir);
1958  sdir->cd();
1959  // write weight file name into target file
1960  TObjString wfilePath( gSystem->WorkingDirectory() );
1961  TObjString wfileName( GetWeightFileName() );
1962  wfilePath.Write( "TrainingPath" );
1963  wfileName.Write( "WeightFileName" );
1964  }
1965 
1966  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
1967  return sdir;
1968 }
1969 
1970 ////////////////////////////////////////////////////////////////////////////////
1971 /// returns the ROOT directory where all instances of the
1972 /// corresponding MVA method are stored
1973 
1975  {
1976  if (fMethodBaseDir != 0) return fMethodBaseDir;
1977 
1978  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " not set yet --> check if already there.." <<Endl;
1979 
1980 
1981  TDirectory *fFactoryBaseDir=GetFile();
1982 
1983  fMethodBaseDir = fFactoryBaseDir->GetDirectory(DataInfo().GetName());
1984  if(!fMethodBaseDir) //creating dataset directory
1985  {
1986  fMethodBaseDir = fFactoryBaseDir->mkdir(DataInfo().GetName(),Form("Base directory for dataset %s",DataInfo().GetName()));
1987  if(!fMethodBaseDir)Log()<<kFATAL<<"Can not create dir "<<DataInfo().GetName();
1988  }
1989  TString _methodDir = Form("Method_%s",GetMethodName().Data());
1991 
1992  if(!fMethodBaseDir){
1993  fMethodBaseDir = fFactoryBaseDir->GetDirectory(DataInfo().GetName())->mkdir(_methodDir.Data(),Form("Directory for all %s methods", GetMethodTypeName().Data()));
1994  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " does not exist yet--> created it" <<Endl;
1995  }
1996 
1997  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<"Return from MethodBaseDir() after creating base directory "<<Endl;
1998  return fMethodBaseDir;
1999 }
2000 
2001 ////////////////////////////////////////////////////////////////////////////////
2002 /// set directory of weight file
2003 
2005 {
2006  fFileDir = fileDir;
2008 }
2009 
2010 ////////////////////////////////////////////////////////////////////////////////
2011 /// set the weight file name (depreciated)
2012 
2014 {
2015  fWeightFile = theWeightFile;
2016 }
2017 
2018 ////////////////////////////////////////////////////////////////////////////////
2019 /// retrieve weight file name
2020 
2022 {
2023  if (fWeightFile!="") return fWeightFile;
2024 
2025  // the default consists of
2026  // directory/jobname_methodname_suffix.extension.{root/txt}
2027  TString suffix = "";
2028  TString wFileDir(GetWeightFileDir());
2029  return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2030  + GetJobName() + "_" + GetMethodName() +
2031  suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml" );
2032 }
2033 
2034 ////////////////////////////////////////////////////////////////////////////////
2035 /// writes all MVA evaluation histograms to file
2036 
2038 {
2039  BaseDir()->cd();
2040 
2041 
2042  // write MVA PDFs to file - if exist
2043  if (0 != fMVAPdfS) {
2046  fMVAPdfS->GetPDFHist()->Write();
2047  }
2048  if (0 != fMVAPdfB) {
2051  fMVAPdfB->GetPDFHist()->Write();
2052  }
2053 
2054  // write result-histograms
2055  Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2056  if (!results)
2057  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2058  << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2059  << "/kMaxAnalysisType" << Endl;
2060  results->GetStorage()->Write();
2061  if (treetype==Types::kTesting) {
2063  }
2064 }
2065 
2066 ////////////////////////////////////////////////////////////////////////////////
2067 /// write special monitoring histograms to file
2068 /// dummy implementation here -----------------
2069 
2071 {
2072 }
2073 
2074 ////////////////////////////////////////////////////////////////////////////////
2075 /// reads one line from the input stream
2076 /// checks for certain keywords and interprets
2077 /// the line if keywords are found
2078 
2079 Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2080 {
2081  fin.getline(buf,512);
2082  TString line(buf);
2083  if (line.BeginsWith("TMVA Release")) {
2084  Ssiz_t start = line.First('[')+1;
2085  Ssiz_t length = line.Index("]",start)-start;
2086  TString code = line(start,length);
2087  std::stringstream s(code.Data());
2089  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2090  }
2091  if (line.BeginsWith("ROOT Release")) {
2092  Ssiz_t start = line.First('[')+1;
2093  Ssiz_t length = line.Index("]",start)-start;
2094  TString code = line(start,length);
2095  std::stringstream s(code.Data());
2097  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2098  }
2099  if (line.BeginsWith("Analysis type")) {
2100  Ssiz_t start = line.First('[')+1;
2101  Ssiz_t length = line.Index("]",start)-start;
2102  TString code = line(start,length);
2103  std::stringstream s(code.Data());
2104  std::string analysisType;
2105  s >> analysisType;
2106  if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2107  else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2108  else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2109  else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2110 
2111  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2112  << (GetAnalysisType() == Types::kRegression ? "Regression" :
2113  (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2114  }
2115 
2116  return true;
2117 }
2118 
2119 ////////////////////////////////////////////////////////////////////////////////
2120 /// Create PDFs of the MVA output variables
2121 
2123 {
2125 
2126  // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2127  // otherwise they will be only used 'online'
2128  ResultsClassification * mvaRes = dynamic_cast<ResultsClassification*>
2130 
2131  if (mvaRes==0 || mvaRes->GetSize()==0) {
2132  Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2133  }
2134 
2135  Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2136  Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2137 
2138  // create histograms that serve as basis to create the MVA Pdfs
2139  TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2140  fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2141  TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2142  fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2143 
2144 
2145  // compute sum of weights properly
2146  histMVAPdfS->Sumw2();
2147  histMVAPdfB->Sumw2();
2148 
2149  // fill histograms
2150  for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2151  Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2152  Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2153 
2154  if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2155  else histMVAPdfB->Fill( theVal, theWeight );
2156  }
2157 
2158  gTools().NormHist( histMVAPdfS );
2159  gTools().NormHist( histMVAPdfB );
2160 
2161  // momentary hack for ROOT problem
2162  if(!IsSilentFile())
2163  {
2164  histMVAPdfS->Write();
2165  histMVAPdfB->Write();
2166  }
2167  // create PDFs
2168  fMVAPdfS->BuildPDF ( histMVAPdfS );
2169  fMVAPdfB->BuildPDF ( histMVAPdfB );
2170  fMVAPdfS->ValidatePDF( histMVAPdfS );
2171  fMVAPdfB->ValidatePDF( histMVAPdfB );
2172 
2173  if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2174  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2175  << Form( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2176  GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2177  << Endl;
2178  }
2179 
2180  delete histMVAPdfS;
2181  delete histMVAPdfB;
2182 }
2183 
2185  // the simple one, automatically calculates the mvaVal and uses the
2186  // SAME sig/bkg ratio as given in the training sample (typically 50/50
2187  // .. (NormMode=EqualNumEvents) but can be different)
2188  if (!fMVAPdfS || !fMVAPdfB) {
2189  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2190  CreateMVAPdfs();
2191  }
2193  Double_t mvaVal = GetMvaValue(ev);
2194 
2195  return GetProba(mvaVal,sigFraction);
2196 
2197 }
2198 ////////////////////////////////////////////////////////////////////////////////
2199 /// compute likelihood ratio
2200 
2202 {
2203  if (!fMVAPdfS || !fMVAPdfB) {
2204  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2205  return -1.0;
2206  }
2207  Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2208  Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2209 
2210  Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2211 
2212  return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2213 }
2214 
2215 ////////////////////////////////////////////////////////////////////////////////
2216 /// compute rarity:
2217 /// \f[
2218 /// R(x) = \int_{[-\infty..x]} { PDF(x') dx' }
2219 /// \f]
2220 /// where PDF(x) is the PDF of the classifier's signal or background distribution
2221 
2223 {
2224  if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2225  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2226  << "select option \"CreateMVAPdfs\"" << Endl;
2227  return 0.0;
2228  }
2229 
2230  PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2231 
2232  return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2233 }
2234 
2235 ////////////////////////////////////////////////////////////////////////////////
2236 /// fill background efficiency (resp. rejection) versus signal efficiency plots
2237 /// returns signal efficiency at background efficiency indicated in theString
2238 
2240 {
2241  Data()->SetCurrentType(type);
2242  Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2243  std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2244 
2245  // parse input string for required background efficiency
2246  TList* list = gTools().ParseFormatLine( theString );
2247 
2248  // sanity check
2249  Bool_t computeArea = kFALSE;
2250  if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2251  else if (list->GetSize() > 2) {
2252  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2253  << " in string: " << theString
2254  << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2255  delete list;
2256  return -1;
2257  }
2258 
2259  // sanity check
2260  if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2261  results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2262  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2263  delete list;
2264  return -1.0;
2265  }
2266 
2267  // create histograms
2268 
2269  // first, get efficiency histograms for signal and background
2270  TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2271  Double_t xmin = effhist->GetXaxis()->GetXmin();
2272  Double_t xmax = effhist->GetXaxis()->GetXmax();
2273 
2274  TTHREAD_TLS(Double_t) nevtS;
2275 
2276  // first round ? --> create histograms
2277  if (results->DoesExist("MVA_EFF_S")==0) {
2278 
2279  // for efficiency plot
2280  TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2281  TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2282  results->Store(eff_s, "MVA_EFF_S");
2283  results->Store(eff_b, "MVA_EFF_B");
2284 
2285  // sign if cut
2286  Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2287 
2288  // this method is unbinned
2289  nevtS = 0;
2290  for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2291 
2292  // read the tree
2293  Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2294  Float_t theWeight = GetEvent(ievt)->GetWeight();
2295  Float_t theVal = (*mvaRes)[ievt];
2296 
2297  // select histogram depending on if sig or bgd
2298  TH1* theHist = isSignal ? eff_s : eff_b;
2299 
2300  // count signal and background events in tree
2301  if (isSignal) nevtS+=theWeight;
2302 
2303  TAxis* axis = theHist->GetXaxis();
2304  Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2305  if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2306  if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2307  if (sign > 0 && maxbin < 1 ) maxbin = 1;
2308  if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2309 
2310  if (sign > 0)
2311  for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2312  else if (sign < 0)
2313  for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2314  else
2315  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2316  }
2317 
2318  // renormalise maximum to <=1
2319  // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2320  // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2321 
2324 
2325  // background efficiency versus signal efficiency
2326  TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2327  results->Store(eff_BvsS, "MVA_EFF_BvsS");
2328  eff_BvsS->SetXTitle( "Signal eff" );
2329  eff_BvsS->SetYTitle( "Backgr eff" );
2330 
2331  // background rejection (=1-eff.) versus signal efficiency
2332  TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2333  results->Store(rej_BvsS);
2334  rej_BvsS->SetXTitle( "Signal eff" );
2335  rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2336 
2337  // inverse background eff (1/eff.) versus signal efficiency
2338  TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2339  GetTestvarName(), fNbins, 0, 1 );
2340  results->Store(inveff_BvsS);
2341  inveff_BvsS->SetXTitle( "Signal eff" );
2342  inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2343 
2344  // use root finder
2345  // spline background efficiency plot
2346  // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2347  if (Use_Splines_for_Eff_) {
2348  fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2349  fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2350 
2351  // verify spline sanity
2352  gTools().CheckSplines( eff_s, fSplRefS );
2353  gTools().CheckSplines( eff_b, fSplRefB );
2354  }
2355 
2356  // make the background-vs-signal efficiency plot
2357 
2358  // create root finder
2359  RootFinder rootFinder( this, fXmin, fXmax );
2360 
2361  Double_t effB = 0;
2362  fEffS = eff_s; // to be set for the root finder
2363  for (Int_t bini=1; bini<=fNbins; bini++) {
2364 
2365  // find cut value corresponding to a given signal efficiency
2366  Double_t effS = eff_BvsS->GetBinCenter( bini );
2367  Double_t cut = rootFinder.Root( effS );
2368 
2369  // retrieve background efficiency for given cut
2370  if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2371  else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2372 
2373  // and fill histograms
2374  eff_BvsS->SetBinContent( bini, effB );
2375  rej_BvsS->SetBinContent( bini, 1.0-effB );
2377  inveff_BvsS->SetBinContent( bini, 1.0/effB );
2378  }
2379 
2380  // create splines for histogram
2381  fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2382 
2383  // search for overlap point where, when cutting on it,
2384  // one would obtain: eff_S = rej_B = 1 - eff_B
2385  Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2386  Int_t nbins_ = 5000;
2387  for (Int_t bini=1; bini<=nbins_; bini++) {
2388 
2389  // get corresponding signal and background efficiencies
2390  effS = (bini - 0.5)/Float_t(nbins_);
2391  rejB = 1.0 - fSpleffBvsS->Eval( effS );
2392 
2393  // find signal efficiency that corresponds to required background efficiency
2394  if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2395  effS_ = effS;
2396  rejB_ = rejB;
2397  }
2398 
2399  // find cut that corresponds to signal efficiency and update signal-like criterion
2400  Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2401  SetSignalReferenceCut( cut );
2402  fEffS = 0;
2403  }
2404 
2405  // must exist...
2406  if (0 == fSpleffBvsS) {
2407  delete list;
2408  return 0.0;
2409  }
2410 
2411  // now find signal efficiency that corresponds to required background efficiency
2412  Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2413  Int_t nbins_ = 1000;
2414 
2415  if (computeArea) {
2416 
2417  // compute area of rej-vs-eff plot
2418  Double_t integral = 0;
2419  for (Int_t bini=1; bini<=nbins_; bini++) {
2420 
2421  // get corresponding signal and background efficiencies
2422  effS = (bini - 0.5)/Float_t(nbins_);
2423  effB = fSpleffBvsS->Eval( effS );
2424  integral += (1.0 - effB);
2425  }
2426  integral /= nbins_;
2427 
2428  delete list;
2429  return integral;
2430  }
2431  else {
2432 
2433  // that will be the value of the efficiency retured (does not affect
2434  // the efficiency-vs-bkg plot which is done anyway.
2435  Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2436 
2437  // find precise efficiency value
2438  for (Int_t bini=1; bini<=nbins_; bini++) {
2439 
2440  // get corresponding signal and background efficiencies
2441  effS = (bini - 0.5)/Float_t(nbins_);
2442  effB = fSpleffBvsS->Eval( effS );
2443 
2444  // find signal efficiency that corresponds to required background efficiency
2445  if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2446  effS_ = effS;
2447  effB_ = effB;
2448  }
2449 
2450  // take mean between bin above and bin below
2451  effS = 0.5*(effS + effS_);
2452 
2453  effSerr = 0;
2454  if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2455 
2456  delete list;
2457  return effS;
2458  }
2459 
2460  return -1;
2461 }
2462 
2463 ////////////////////////////////////////////////////////////////////////////////
2464 
2466 {
2468 
2470 
2471  // fill background efficiency (resp. rejection) versus signal efficiency plots
2472  // returns signal efficiency at background efficiency indicated in theString
2473 
2474  // parse input string for required background efficiency
2475  TList* list = gTools().ParseFormatLine( theString );
2476  // sanity check
2477 
2478  if (list->GetSize() != 2) {
2479  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2480  << " in string: " << theString
2481  << " | required format, e.g., Efficiency:0.05" << Endl;
2482  delete list;
2483  return -1;
2484  }
2485  // that will be the value of the efficiency retured (does not affect
2486  // the efficiency-vs-bkg plot which is done anyway.
2487  Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2488 
2489  delete list;
2490 
2491  // sanity check
2492  if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2493  results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2494  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2495  << Endl;
2496  return -1.0;
2497  }
2498 
2499  // create histogram
2500 
2501  // first, get efficiency histograms for signal and background
2502  TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2503  Double_t xmin = effhist->GetXaxis()->GetXmin();
2504  Double_t xmax = effhist->GetXaxis()->GetXmax();
2505 
2506  // first round ? --> create and fill histograms
2507  if (results->DoesExist("MVA_TRAIN_S")==0) {
2508 
2509  // classifier response distributions for test sample
2510  Double_t sxmax = fXmax+0.00001;
2511 
2512  // MVA plots on the training sample (check for overtraining)
2513  TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2514  TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2515  results->Store(mva_s_tr, "MVA_TRAIN_S");
2516  results->Store(mva_b_tr, "MVA_TRAIN_B");
2517  mva_s_tr->Sumw2();
2518  mva_b_tr->Sumw2();
2519 
2520  // Training efficiency plots
2521  TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2522  fNbinsH, xmin, xmax );
2523  TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2524  fNbinsH, xmin, xmax );
2525  results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2526  results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2527 
2528  // sign if cut
2529  Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2530 
2531  std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2532  assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2533 
2534  // this method is unbinned
2535  for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2536 
2537  Data()->SetCurrentEvent(ievt);
2538  const Event* ev = GetEvent();
2539 
2540  Double_t theVal = mvaValues[ievt];
2541  Double_t theWeight = ev->GetWeight();
2542 
2543  TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2544  TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2545 
2546  theClsHist->Fill( theVal, theWeight );
2547 
2548  TAxis* axis = theEffHist->GetXaxis();
2549  Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2550  if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2551  if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2552  if (sign > 0 && maxbin < 1 ) maxbin = 1;
2553  if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2554 
2555  if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2556  else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2557  }
2558 
2559  // normalise output distributions
2560  // uncomment those (and several others if you want unnormalized output
2561  gTools().NormHist( mva_s_tr );
2562  gTools().NormHist( mva_b_tr );
2563 
2564  // renormalise to maximum
2565  mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2566  mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2567 
2568  // Training background efficiency versus signal efficiency
2569  TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2570  // Training background rejection (=1-eff.) versus signal efficiency
2571  TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2572  results->Store(eff_bvss, "EFF_BVSS_TR");
2573  results->Store(rej_bvss, "REJ_BVSS_TR");
2574 
2575  // use root finder
2576  // spline background efficiency plot
2577  // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2578  if (Use_Splines_for_Eff_) {
2579  if (fSplTrainRefS) delete fSplTrainRefS;
2580  if (fSplTrainRefB) delete fSplTrainRefB;
2581  fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2582  fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2583 
2584  // verify spline sanity
2585  gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2586  gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2587  }
2588 
2589  // make the background-vs-signal efficiency plot
2590 
2591  // create root finder
2592  RootFinder rootFinder(this, fXmin, fXmax );
2593 
2594  Double_t effB = 0;
2595  fEffS = results->GetHist("MVA_TRAINEFF_S");
2596  for (Int_t bini=1; bini<=fNbins; bini++) {
2597 
2598  // find cut value corresponding to a given signal efficiency
2599  Double_t effS = eff_bvss->GetBinCenter( bini );
2600 
2601  Double_t cut = rootFinder.Root( effS );
2602 
2603  // retrieve background efficiency for given cut
2604  if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2605  else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2606 
2607  // and fill histograms
2608  eff_bvss->SetBinContent( bini, effB );
2609  rej_bvss->SetBinContent( bini, 1.0-effB );
2610  }
2611  fEffS = 0;
2612 
2613  // create splines for histogram
2614  fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2615  }
2616 
2617  // must exist...
2618  if (0 == fSplTrainEffBvsS) return 0.0;
2619 
2620  // now find signal efficiency that corresponds to required background efficiency
2621  Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2622  Int_t nbins_ = 1000;
2623  for (Int_t bini=1; bini<=nbins_; bini++) {
2624 
2625  // get corresponding signal and background efficiencies
2626  effS = (bini - 0.5)/Float_t(nbins_);
2627  effB = fSplTrainEffBvsS->Eval( effS );
2628 
2629  // find signal efficiency that corresponds to required background efficiency
2630  if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2631  effS_ = effS;
2632  effB_ = effB;
2633  }
2634 
2635  return 0.5*(effS + effS_); // the mean between bin above and bin below
2636 }
2637 
2638 ////////////////////////////////////////////////////////////////////////////////
2639 
2640 std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2641 {
2644  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2645 
2646  purity.push_back(resMulticlass->GetAchievablePur());
2647  return resMulticlass->GetAchievableEff();
2648 }
2649 
2650 ////////////////////////////////////////////////////////////////////////////////
2651 
2652 std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2653 {
2656  if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2657 
2658  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2659  for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2660  resMulticlass->GetBestMultiClassCuts(icls);
2661  }
2662 
2663  purity.push_back(resMulticlass->GetAchievablePur());
2664  return resMulticlass->GetAchievableEff();
2665 }
2666 
2667 ////////////////////////////////////////////////////////////////////////////////
2668 /// Construct a confusion matrix for a multiclass classifier. The confusion
2669 /// matrix compares, in turn, each class agaist all other classes in a pair-wise
2670 /// fashion. In rows with index \f$ k_r = 0 ... K \f$, \f$ k_r \f$ is
2671 /// considered signal for the sake of comparison and for each column
2672 /// \f$ k_c = 0 ... K \f$ the corresponding class is considered background.
2673 ///
2674 /// Note that the diagonal elements will be returned as NaN since this will
2675 /// compare a class against itself.
2676 ///
2677 /// \see TMVA::ResultsMulticlass::GetConfusionMatrix
2678 ///
2679 /// \param[in] effB The background efficiency for which to evaluate.
2680 /// \param[in] type The data set on which to evaluate (training, testing ...).
2681 ///
2682 /// \return A matrix containing signal efficiencies for the given background
2683 /// efficiency. The diagonal elements are NaN since this measure is
2684 /// meaningless (comparing a class against itself).
2685 ///
2686 
2688 {
2690  Log() << kFATAL << "Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2691  return TMatrixD(0, 0);
2692  }
2693 
2694  Data()->SetCurrentType(type);
2695  ResultsMulticlass *resMulticlass =
2696  dynamic_cast<ResultsMulticlass *>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
2697 
2698  if (resMulticlass == nullptr) {
2699  Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2700  << "unable to create pointer in GetMulticlassEfficiency, exiting." << Endl;
2701  return TMatrixD(0, 0);
2702  }
2703 
2704  return resMulticlass->GetConfusionMatrix(effB);
2705 }
2706 
2707 ////////////////////////////////////////////////////////////////////////////////
2708 /// compute significance of mean difference
2709 /// \f[
2710 /// significance = \frac{|<S> - <B>|}{\sqrt{RMS_{S2} + RMS_{B2}}}
2711 /// \f]
2712 
2714 {
2715  Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2716 
2717  return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2718 }
2719 
2720 ////////////////////////////////////////////////////////////////////////////////
2721 /// compute "separation" defined as
2722 /// \f[
2723 /// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2724 /// \f]
2725 
2727 {
2728  return gTools().GetSeparation( histoS, histoB );
2729 }
2730 
2731 ////////////////////////////////////////////////////////////////////////////////
2732 /// compute "separation" defined as
2733 /// \f[
2734 /// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2735 /// \f]
2736 
2738 {
2739  // note, if zero pointers given, use internal pdf
2740  // sanity check first
2741  if ((!pdfS && pdfB) || (pdfS && !pdfB))
2742  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2743  if (!pdfS) pdfS = fSplS;
2744  if (!pdfB) pdfB = fSplB;
2745 
2746  if (!fSplS || !fSplB) {
2747  Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2748  << " fSplS or fSplB are not yet filled" << Endl;
2749  return 0;
2750  }else{
2751  return gTools().GetSeparation( *pdfS, *pdfB );
2752  }
2753 }
2754 
2755 ////////////////////////////////////////////////////////////////////////////////
2756 /// calculate the area (integral) under the ROC curve as a
2757 /// overall quality measure of the classification
2758 
2760 {
2761  // note, if zero pointers given, use internal pdf
2762  // sanity check first
2763  if ((!histS && histB) || (histS && !histB))
2764  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2765 
2766  if (histS==0 || histB==0) return 0.;
2767 
2768  TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2769  TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2770 
2771 
2772  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2773  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2774 
2775  Double_t integral = 0;
2776  UInt_t nsteps = 1000;
2777  Double_t step = (xmax-xmin)/Double_t(nsteps);
2778  Double_t cut = xmin;
2779  for (UInt_t i=0; i<nsteps; i++) {
2780  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2781  cut+=step;
2782  }
2783  return integral*step;
2784 }
2785 
2786 
2787 ////////////////////////////////////////////////////////////////////////////////
2788 /// calculate the area (integral) under the ROC curve as a
2789 /// overall quality measure of the classification
2790 
2792 {
2793  // note, if zero pointers given, use internal pdf
2794  // sanity check first
2795  if ((!pdfS && pdfB) || (pdfS && !pdfB))
2796  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2797  if (!pdfS) pdfS = fSplS;
2798  if (!pdfB) pdfB = fSplB;
2799 
2800  if (pdfS==0 || pdfB==0) return 0.;
2801 
2802  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2803  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2804 
2805  Double_t integral = 0;
2806  UInt_t nsteps = 1000;
2807  Double_t step = (xmax-xmin)/Double_t(nsteps);
2808  Double_t cut = xmin;
2809  for (UInt_t i=0; i<nsteps; i++) {
2810  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2811  cut+=step;
2812  }
2813  return integral*step;
2814 }
2815 
2816 ////////////////////////////////////////////////////////////////////////////////
2817 /// plot significance, \f$ \frac{S}{\sqrt{S^2 + B^2}} \f$, curve for given number
2818 /// of signal and background events; returns cut for maximum significance
2819 /// also returned via reference is the maximum significance
2820 
2822  Double_t BackgroundEvents,
2823  Double_t& max_significance_value ) const
2824 {
2826 
2827  Double_t max_significance(0);
2828  Double_t effS(0),effB(0),significance(0);
2829  TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2830 
2831  if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2832  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2833  << "Number of signal or background events is <= 0 ==> abort"
2834  << Endl;
2835  }
2836 
2837  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2838  << SignalEvents/BackgroundEvents << Endl;
2839 
2840  TH1* eff_s = results->GetHist("MVA_EFF_S");
2841  TH1* eff_b = results->GetHist("MVA_EFF_B");
2842 
2843  if ( (eff_s==0) || (eff_b==0) ) {
2844  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2845  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2846  return 0;
2847  }
2848 
2849  for (Int_t bin=1; bin<=fNbinsH; bin++) {
2850  effS = eff_s->GetBinContent( bin );
2851  effB = eff_b->GetBinContent( bin );
2852 
2853  // put significance into a histogram
2854  significance = sqrt(SignalEvents)*( effS )/sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
2855 
2856  temp_histogram->SetBinContent(bin,significance);
2857  }
2858 
2859  // find maximum in histogram
2860  max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
2861  max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
2862 
2863  // delete
2864  delete temp_histogram;
2865 
2866  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
2867  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
2868 
2869  return max_significance;
2870 }
2871 
2872 ////////////////////////////////////////////////////////////////////////////////
2873 /// calculates rms,mean, xmin, xmax of the event variable
2874 /// this can be either done for the variables as they are or for
2875 /// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
2876 
2877 void TMVA::MethodBase::Statistics( Types::ETreeType treeType, const TString& theVarName,
2878  Double_t& meanS, Double_t& meanB,
2879  Double_t& rmsS, Double_t& rmsB,
2880  Double_t& xmin, Double_t& xmax )
2881 {
2882  Types::ETreeType previousTreeType = Data()->GetCurrentType();
2883  Data()->SetCurrentType(treeType);
2884 
2885  Long64_t entries = Data()->GetNEvents();
2886 
2887  // sanity check
2888  if (entries <=0)
2889  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
2890 
2891  // index of the wanted variable
2892  UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
2893 
2894  // first fill signal and background in arrays before analysis
2895  xmin = +DBL_MAX;
2896  xmax = -DBL_MAX;
2897  Long64_t nEventsS = -1;
2898  Long64_t nEventsB = -1;
2899 
2900  // take into account event weights
2901  meanS = 0;
2902  meanB = 0;
2903  rmsS = 0;
2904  rmsB = 0;
2905  Double_t sumwS = 0, sumwB = 0;
2906 
2907  // loop over all training events
2908  for (Int_t ievt = 0; ievt < entries; ievt++) {
2909 
2910  const Event* ev = GetEvent(ievt);
2911 
2912  Double_t theVar = ev->GetValue(varIndex);
2913  Double_t weight = ev->GetWeight();
2914 
2915  if (DataInfo().IsSignal(ev)) {
2916  sumwS += weight;
2917  meanS += weight*theVar;
2918  rmsS += weight*theVar*theVar;
2919  }
2920  else {
2921  sumwB += weight;
2922  meanB += weight*theVar;
2923  rmsB += weight*theVar*theVar;
2924  }
2925  xmin = TMath::Min( xmin, theVar );
2926  xmax = TMath::Max( xmax, theVar );
2927  }
2928  ++nEventsS;
2929  ++nEventsB;
2930 
2931  meanS = meanS/sumwS;
2932  meanB = meanB/sumwB;
2933  rmsS = TMath::Sqrt( rmsS/sumwS - meanS*meanS );
2934  rmsB = TMath::Sqrt( rmsB/sumwB - meanB*meanB );
2935 
2936  Data()->SetCurrentType(previousTreeType);
2937 }
2938 
2939 ////////////////////////////////////////////////////////////////////////////////
2940 /// create reader class for method (classification only at present)
2941 
2942 void TMVA::MethodBase::MakeClass( const TString& theClassFileName ) const
2943 {
2944  // the default consists of
2945  TString classFileName = "";
2946  if (theClassFileName == "")
2947  classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
2948  else
2949  classFileName = theClassFileName;
2950 
2951  TString className = TString("Read") + GetMethodName();
2952 
2953  TString tfname( classFileName );
2954  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
2955  << "Creating standalone class: "
2956  << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
2957 
2958  std::ofstream fout( classFileName );
2959  if (!fout.good()) { // file could not be opened --> Error
2960  Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
2961  }
2962 
2963  // now create the class
2964  // preamble
2965  fout << "// Class: " << className << std::endl;
2966  fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
2967 
2968  // print general information and configuration state
2969  fout << std::endl;
2970  fout << "/* configuration options =====================================================" << std::endl << std::endl;
2971  WriteStateToStream( fout );
2972  fout << std::endl;
2973  fout << "============================================================================ */" << std::endl;
2974 
2975  // generate the class
2976  fout << "" << std::endl;
2977  fout << "#include <array>" << std::endl;
2978  fout << "#include <vector>" << std::endl;
2979  fout << "#include <cmath>" << std::endl;
2980  fout << "#include <string>" << std::endl;
2981  fout << "#include <iostream>" << std::endl;
2982  fout << "" << std::endl;
2983  // now if the classifier needs to write some additional classes for its response implementation
2984  // this code goes here: (at least the header declarations need to come before the main class
2985  this->MakeClassSpecificHeader( fout, className );
2986 
2987  fout << "#ifndef IClassifierReader__def" << std::endl;
2988  fout << "#define IClassifierReader__def" << std::endl;
2989  fout << std::endl;
2990  fout << "class IClassifierReader {" << std::endl;
2991  fout << std::endl;
2992  fout << " public:" << std::endl;
2993  fout << std::endl;
2994  fout << " // constructor" << std::endl;
2995  fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
2996  fout << " virtual ~IClassifierReader() {}" << std::endl;
2997  fout << std::endl;
2998  fout << " // return classifier response" << std::endl;
2999  fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3000  fout << std::endl;
3001  fout << " // returns classifier status" << std::endl;
3002  fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3003  fout << std::endl;
3004  fout << " protected:" << std::endl;
3005  fout << std::endl;
3006  fout << " bool fStatusIsClean;" << std::endl;
3007  fout << "};" << std::endl;
3008  fout << std::endl;
3009  fout << "#endif" << std::endl;
3010  fout << std::endl;
3011  fout << "class " << className << " : public IClassifierReader {" << std::endl;
3012  fout << std::endl;
3013  fout << " public:" << std::endl;
3014  fout << std::endl;
3015  fout << " // constructor" << std::endl;
3016  fout << " " << className << "( std::vector<std::string>& theInputVars ) " << std::endl;
3017  fout << " : IClassifierReader()," << std::endl;
3018  fout << " fClassName( \"" << className << "\" )," << std::endl;
3019  fout << " fNvars( " << GetNvar() << " )," << std::endl;
3020  fout << " fIsNormalised( " << (IsNormalised() ? "true" : "false") << " )" << std::endl;
3021  fout << " { " << std::endl;
3022  fout << " // the training input variables" << std::endl;
3023  fout << " const char* inputVars[] = { ";
3024  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3025  fout << "\"" << GetOriginalVarName(ivar) << "\"";
3026  if (ivar<GetNvar()-1) fout << ", ";
3027  }
3028  fout << " };" << std::endl;
3029  fout << std::endl;
3030  fout << " // sanity checks" << std::endl;
3031  fout << " if (theInputVars.size() <= 0) {" << std::endl;
3032  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3033  fout << " fStatusIsClean = false;" << std::endl;
3034  fout << " }" << std::endl;
3035  fout << std::endl;
3036  fout << " if (theInputVars.size() != fNvars) {" << std::endl;
3037  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3038  fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3039  fout << " fStatusIsClean = false;" << std::endl;
3040  fout << " }" << std::endl;
3041  fout << std::endl;
3042  fout << " // validate input variables" << std::endl;
3043  fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3044  fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3045  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3046  fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3047  fout << " fStatusIsClean = false;" << std::endl;
3048  fout << " }" << std::endl;
3049  fout << " }" << std::endl;
3050  fout << std::endl;
3051  fout << " // initialize min and max vectors (for normalisation)" << std::endl;
3052  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3053  fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
3054  fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
3055  }
3056  fout << std::endl;
3057  fout << " // initialize input variable types" << std::endl;
3058  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3059  fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3060  }
3061  fout << std::endl;
3062  fout << " // initialize constants" << std::endl;
3063  fout << " Initialize();" << std::endl;
3064  fout << std::endl;
3065  if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3066  fout << " // initialize transformation" << std::endl;
3067  fout << " InitTransform();" << std::endl;
3068  }
3069  fout << " }" << std::endl;
3070  fout << std::endl;
3071  fout << " // destructor" << std::endl;
3072  fout << " virtual ~" << className << "() {" << std::endl;
3073  fout << " Clear(); // method-specific" << std::endl;
3074  fout << " }" << std::endl;
3075  fout << std::endl;
3076  fout << " // the classifier response" << std::endl;
3077  fout << " // \"inputValues\" is a vector of input values in the same order as the " << std::endl;
3078  fout << " // variables given to the constructor" << std::endl;
3079  fout << " double GetMvaValue( const std::vector<double>& inputValues ) const;" << std::endl;
3080  fout << std::endl;
3081  fout << " private:" << std::endl;
3082  fout << std::endl;
3083  fout << " // method-specific destructor" << std::endl;
3084  fout << " void Clear();" << std::endl;
3085  fout << std::endl;
3086  if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3087  fout << " // input variable transformation" << std::endl;
3088  GetTransformationHandler().MakeFunction(fout, className,1);
3089  fout << " void InitTransform();" << std::endl;
3090  fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3091  fout << std::endl;
3092  }
3093  fout << " // common member variables" << std::endl;
3094  fout << " const char* fClassName;" << std::endl;
3095  fout << std::endl;
3096  fout << " const size_t fNvars;" << std::endl;
3097  fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3098  fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3099  fout << std::endl;
3100  fout << " // normalisation of input variables" << std::endl;
3101  fout << " const bool fIsNormalised;" << std::endl;
3102  fout << " bool IsNormalised() const { return fIsNormalised; }" << std::endl;
3103  fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3104  fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3105  fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3106  fout << " // normalise to output range: [-1, 1]" << std::endl;
3107  fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3108  fout << " }" << std::endl;
3109  fout << std::endl;
3110  fout << " // type of input variable: 'F' or 'I'" << std::endl;
3111  fout << " char fType[" << GetNvar() << "];" << std::endl;
3112  fout << std::endl;
3113  fout << " // initialize internal variables" << std::endl;
3114  fout << " void Initialize();" << std::endl;
3115  fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3116  fout << "" << std::endl;
3117  fout << " // private members (method specific)" << std::endl;
3118 
3119  // call the classifier specific output (the classifier must close the class !)
3120  MakeClassSpecific( fout, className );
3121 
3122  fout << " inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3123  fout << " {" << std::endl;
3124  fout << " // classifier response value" << std::endl;
3125  fout << " double retval = 0;" << std::endl;
3126  fout << std::endl;
3127  fout << " // classifier response, sanity check first" << std::endl;
3128  fout << " if (!IsStatusClean()) {" << std::endl;
3129  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3130  fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3131  fout << " retval = 0;" << std::endl;
3132  fout << " }" << std::endl;
3133  fout << " else {" << std::endl;
3134  fout << " if (IsNormalised()) {" << std::endl;
3135  fout << " // normalise variables" << std::endl;
3136  fout << " std::vector<double> iV;" << std::endl;
3137  fout << " iV.reserve(inputValues.size());" << std::endl;
3138  fout << " int ivar = 0;" << std::endl;
3139  fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3140  fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3141  fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3142  fout << " }" << std::endl;
3143  if (GetTransformationHandler().GetTransformationList().GetSize()!=0 &&
3146  fout << " Transform( iV, -1 );" << std::endl;
3147  }
3148  fout << " retval = GetMvaValue__( iV );" << std::endl;
3149  fout << " }" << std::endl;
3150  fout << " else {" << std::endl;
3151  if (GetTransformationHandler().GetTransformationList().GetSize()!=0 &&
3154  fout << " std::vector<double> iV;" << std::endl;
3155  fout << " int ivar = 0;" << std::endl;
3156  fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3157  fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3158  fout << " iV.push_back(*varIt);" << std::endl;
3159  fout << " }" << std::endl;
3160  fout << " Transform( iV, -1 );" << std::endl;
3161  fout << " retval = GetMvaValue__( iV );" << std::endl;
3162  }
3163  else {
3164  fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3165  }
3166  fout << " }" << std::endl;
3167  fout << " }" << std::endl;
3168  fout << std::endl;
3169  fout << " return retval;" << std::endl;
3170  fout << " }" << std::endl;
3171 
3172  // create output for transformation - if any
3173  if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3174  GetTransformationHandler().MakeFunction(fout, className,2);
3175 
3176  // close the file
3177  fout.close();
3178 }
3179 
3180 ////////////////////////////////////////////////////////////////////////////////
3181 /// prints out method-specific help method
3182 
3184 {
3185  // if options are written to reference file, also append help info
3186  std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3187  std::ofstream* o = 0;
3188  if (gConfig().WriteOptionsReference()) {
3189  Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3190  o = new std::ofstream( GetReferenceFile(), std::ios::app );
3191  if (!o->good()) { // file could not be opened --> Error
3192  Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3193  }
3194  std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3195  }
3196 
3197  // "|--------------------------------------------------------------|"
3198  if (!o) {
3199  Log() << kINFO << Endl;
3200  Log() << gTools().Color("bold")
3201  << "================================================================"
3202  << gTools().Color( "reset" )
3203  << Endl;
3204  Log() << gTools().Color("bold")
3205  << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3206  << gTools().Color( "reset" )
3207  << Endl;
3208  }
3209  else {
3210  Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3211  }
3212 
3213  // print method-specific help message
3214  GetHelpMessage();
3215 
3216  if (!o) {
3217  Log() << Endl;
3218  Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3219  Log() << gTools().Color("bold")
3220  << "================================================================"
3221  << gTools().Color( "reset" )
3222  << Endl;
3223  Log() << Endl;
3224  }
3225  else {
3226  // indicate END
3227  Log() << "# End of Message___" << Endl;
3228  }
3229 
3230  std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3231  if (o) o->close();
3232 }
3233 
3234 // ----------------------- r o o t f i n d i n g ----------------------------
3235 
3236 ////////////////////////////////////////////////////////////////////////////////
3237 /// returns efficiency as function of cut
3238 
3240 {
3241  Double_t retval=0;
3242 
3243  // retrieve the class object
3244  if (Use_Splines_for_Eff_) {
3245  retval = fSplRefS->Eval( theCut );
3246  }
3247  else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3248 
3249  // caution: here we take some "forbidden" action to hide a problem:
3250  // in some cases, in particular for likelihood, the binned efficiency distributions
3251  // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3252  // unbinned information available in the trees, but the unbinned minimization is
3253  // too slow, and we don't need to do a precision measurement here. Hence, we force
3254  // this property.
3255  Double_t eps = 1.0e-5;
3256  if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3257  else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3258 
3259  return retval;
3260 }
3261 
3262 ////////////////////////////////////////////////////////////////////////////////
3263 /// returns the event collection (i.e. the dataset) TRANSFORMED using the
3264 /// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3265 
3267 {
3268  // if there's no variable transformation for this classifier, just hand back the
3269  // event collection of the data set
3270  if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3271  return (Data()->GetEventCollection(type));
3272  }
3273 
3274  // otherwise, transform ALL the events and hand back the vector of the pointers to the
3275  // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3276  // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3277  Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3278  if (fEventCollections.at(idx) == 0) {
3279  fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3281  }
3282  return *(fEventCollections.at(idx));
3283 }
3284 
3285 ////////////////////////////////////////////////////////////////////////////////
3286 /// calculates the TMVA version string from the training version code on the fly
3287 
3289 {
3290  UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3291  UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3292  UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3293 
3294  return TString(Form("%i.%i.%i",a,b,c));
3295 }
3296 
3297 ////////////////////////////////////////////////////////////////////////////////
3298 /// calculates the ROOT version string from the training version code on the fly
3299 
3301 {
3302  UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3303  UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3304  UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3305 
3306  return TString(Form("%i.%02i/%02i",a,b,c));
3307 }
3308 
3309 ////////////////////////////////////////////////////////////////////////////////
3310 
3312  ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
3314 
3315  if (mvaRes != NULL) {
3316  TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3317  TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3318  TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3319  TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3320 
3321  if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3322 
3323  if (SorB == 's' || SorB == 'S')
3324  return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3325  else
3326  return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3327  }
3328  return -1;
3329 }
virtual void DeclareOptions()=0
Bool_t HasMVAPdfs() const
Definition: MethodBase.h:424
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:582
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
TString fMethodName
Definition: MethodBase.h:602
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:928
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition: TH1.cxx:3565
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:6063
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3244
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables, variable transformation type etc.
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters according to the argument This is just a dummy .
Definition: MethodBase.cxx:649
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
virtual Double_t GetMaximum(Double_t maxval=FLT_MAX) const
Return maximum value smaller than maxval of bins in the range, unless the value has been overridden b...
Definition: TH1.cxx:7807
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
TXMLEngine & xmlengine()
Definition: Tools.h:270
float xmin
Definition: THbookFile.cxx:93
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8397
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodBase.h:222
#define TMVA_VERSION_CODE
Definition: Version.h:47
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ReadOptionsFromXML(void *node)
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:964
Singleton class for Global types used by TMVA.
Definition: Types.h:73
long long Long64_t
Definition: RtypesCore.h:69
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
Class that is the base-class for a vector of result.
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:212
Bool_t fIgnoreNegWeightsInTraining
Definition: MethodBase.h:669
virtual const char * WorkingDirectory()
Return working directory.
Definition: TSystem.cxx:869
void ReadStateFromXML(void *parent)
std::vector< VariableInfo > & GetSpectatorInfos()
Definition: DataSetInfo.h:104
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream ...
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
TLine * line
virtual void MakeClassSpecificHeader(std::ostream &, const TString &="") const
Definition: MethodBase.h:510
Collectable string class.
Definition: TObjString.h:28
TSpline1 * fSplTrainRefS
Definition: MethodBase.h:692
float Float_t
Definition: RtypesCore.h:53
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
std::vector< TGraph * > fGraphs
Definition: MethodBase.h:104
void ReadOptionsFromStream(std::istream &istr)
read option back from the weight file
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:638
UInt_t GetNvar() const
Definition: MethodBase.h:333
TH1 * GetSmoothedHist() const
Definition: PDF.h:95
virtual const char * GetBuildNode() const
Return the build node name.
Definition: TSystem.cxx:3792
void BuildPDF(const TH1 *theHist)
Definition: PDF.cxx:259
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
const TString & GetOriginalVarName(Int_t ivar) const
Definition: MethodBase.h:498
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:628
TString fWeightFile
Definition: MethodBase.h:625
TString fVariableTransformTypeString
Definition: MethodBase.h:711
Config & gConfig()
void PlotVariables(const std::vector< Event *> &events, TDirectory *theDirectory=0)
create histograms from the input variables
MsgLogger & Log() const
Definition: Configurable.h:122
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
TransformationHandler * fTransformationPointer
Definition: MethodBase.h:658
Types::ESBType fVariableTransformType
Definition: MethodBase.h:598
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4763
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
virtual Double_t Eval(Double_t x) const
returns linearly interpolated TGraph entry around x
Definition: TSpline1.cxx:61
EAnalysisType
Definition: Types.h:125
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:35
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum=0)
Compute Quantiles for this histogram Quantile x_q of a probability distribution Function F is defined...
Definition: TH1.cxx:4315
TH1 * GetPDFHist() const
Definition: PDF.h:92
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:825
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6892
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:585
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodBase.h:216
const TString & GetReferenceFile() const
Definition: Configurable.h:102
Basic string class.
Definition: TString.h:125
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition: TH1.cxx:705
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
void SetTrainTime(Double_t trainTime)
Definition: MethodBase.h:157
TMultiGraph * fMultiGraph
Definition: MethodBase.h:101
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition: Tools.cxx:600
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:383
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1099
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
int Int_t
Definition: RtypesCore.h:41
virtual void SetYTitle(const char *title)
Definition: TH1.h:406
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
bool Bool_t
Definition: RtypesCore.h:59
virtual void TestMulticlass()
test multiclass classification
TString fJobName
Definition: MethodBase.h:601
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
TSpline1 * fSplRefB
Definition: MethodBase.h:690
UInt_t GetNTargets() const
Definition: MethodBase.h:335
TSpline1 * fSplRefS
Definition: MethodBase.h:689
TMatrixD GetConfusionMatrix(Double_t effB)
Returns a confusion matrix where each class is pitted against each other.
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:225
std::vector< TString > * fInputVars
Definition: MethodBase.h:575
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
Definition: MethodBase.cxx:729
#define ROOT_VERSION_CODE
Definition: RVersion.h:21
void ReadTargetsFromXML(void *tarnode)
read target info from XML
virtual void Init()=0
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, , curve for given number of signal and background events; returns cut for maximum ...
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:308
Double_t fTrainTime
Definition: MethodBase.h:682
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored ...
Double_t GetTrainingSumSignalWeights()
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1135
Double_t fTestTime
Definition: MethodBase.h:683
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
MsgLogger * fLogger
Definition: Configurable.h:128
Double_t GetTrainTime() const
Definition: MethodBase.h:158
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:847
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1225
void ReadVariablesFromXML(void *varnode)
read variable info from XML
const TString & GetExpression() const
Definition: VariableInfo.h:57
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3950
const TString & GetWeightFileDir() const
Definition: MethodBase.h:479
UInt_t fSignalClass
Definition: MethodBase.h:676
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
double sqrt(double)
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:392
Class that contains all the information of a class.
Definition: ClassInfo.h:49
Double_t GetXmin() const
Definition: TAxis.h:133
char GetVarType() const
Definition: VariableInfo.h:61
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:594
UInt_t TreeIndex(Types::ETreeType type) const
Definition: DataSet.h:190
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2365
ECutOrientation fCutOrientation
Definition: MethodBase.h:686
virtual ~MethodBase()
destructor
Definition: MethodBase.cxx:369
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
UInt_t GetTrainingTMVAVersionCode() const
Definition: MethodBase.h:378
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
Definition: MethodBase.cxx:887
void AddXMLTo(void *parent=0) const
XML node describing the transformation.
const Event * GetEvent() const
Definition: MethodBase.h:738
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
Definition: MethodBase.cxx:242
DataSet * Data() const
Definition: MethodBase.h:398
void ClearGraphs()
This function sets the point number to 0 for all graphs.
Definition: MethodBase.cxx:198
Bool_t CheckSplines(const TH1 *, const TSpline *)
check quality of splining by comparing splines and histograms in each bin
Definition: Tools.cxx:490
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
void ReadStateFromFile()
Function to write options and weights to file.
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:203
std::vector< Float_t > * GetValueVector()
void SetCallerName(const TString &name)
~IPythonInteractive()
standard destructor
Definition: MethodBase.cxx:159
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:861
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity: where PDF(x) is the PDF of the classifier&#39;s signal or background distribution ...
void PrintHelpMessage() const
prints out method-specific help method
Double_t fMeanB
Definition: MethodBase.h:649
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
Definition: TString.cxx:2231
IONames & GetIONames()
Definition: Config.h:85
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:394
virtual void ParseOptions()
options parser
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:345
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:411
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:174
DataSetInfo & DataInfo() const
Definition: MethodBase.h:399
void SetOptions(const TString &s)
Definition: Configurable.h:85
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition: TSystem.cxx:1574
Bool_t DoRegression() const
Definition: MethodBase.h:427
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:72
TString fTestvar
Definition: MethodBase.h:604
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:477
Bool_t DoesExist(const TString &alias) const
Definition: Results.cxx:121
Class that contains all the data information.
Definition: DataSetInfo.h:60
TFile * GetFile() const
Definition: MethodBase.h:359
virtual void ProcessOptions()=0
virtual Double_t GetProba(const Event *ev)
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
TH1F * h1
Definition: legend1.C:5
TSpline * fSpleffBvsS
Definition: MethodBase.h:639
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
Definition: TH1.cxx:1200
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
std::vector< VariableInfo > & GetTargetInfos()
Definition: DataSetInfo.h:99
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
Bool_t fModelPersistence
Definition: MethodBase.h:620
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Definition: RootFinder.cxx:72
virtual void AddWeightsXMLTo(void *parent) const =0
UInt_t fTMVATrainingVersion
Definition: MethodBase.h:605
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:406
const std::vector< Event * > * CalcTransformations(const std::vector< Event *> &, Bool_t createNewVector=kFALSE)
computation of transformation
A doubly linked list.
Definition: TList.h:44
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:346
TransformationHandler fTransformation
Definition: MethodBase.h:659
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
Bool_t DoMulticlass() const
Definition: MethodBase.h:428
virtual Double_t KolmogorovTest(const TH1 *h2, Option_t *option="") const
Statistical test of compatibility in shape between this histogram and h2, using Kolmogorov test...
Definition: TH1.cxx:7490
virtual void MakeClassSpecific(std::ostream &, const TString &="") const
Definition: MethodBase.h:507
virtual void ReadWeightsFromXML(void *wghtnode)=0
Int_t GetHistNBins(Int_t evtNum=0)
Definition: PDF.cxx:303
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
TString fWeightFileExtension
Definition: Config.h:108
TString fUser
Definition: TSystem.h:142
virtual void Train()=0
void * GetExternalLink() const
Definition: VariableInfo.h:81
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables, variable transformation type etc.
Double_t fRmsB
Definition: MethodBase.h:651
Double_t fXmin
Definition: MethodBase.h:652
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
Linear interpolation class.
TSpline1 * fSplTrainRefB
Definition: MethodBase.h:693
ROOT::R::TRInterface & r
Definition: Object.C:4
Class to manage histogram axis.
Definition: TAxis.h:30
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
TDirectory * fMethodBaseDir
Definition: MethodBase.h:613
SVector< double, 2 > v
Definition: Dict.h:5
UInt_t fROOTTrainingVersion
Definition: MethodBase.h:606
const char * GetName() const
Definition: MethodBase.h:323
ClassInfo * GetClassInfo(Int_t clNum) const
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream...
void AddClassesXMLTo(void *parent) const
write class info to XML
auto * a
Definition: textangle.C:12
virtual void ReadTransformationFromStream(std::istream &istr, const TString &classname="")=0
const Int_t NBIN_HIST_HIGH
Definition: MethodBase.cxx:137
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:249
class TMVA::Config::VariablePlotting fVariablePlotting
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:561
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8477
Float_t GetAchievablePur(UInt_t cls)
void SetReadingVersion(UInt_t rv)
Definition: PDF.h:119
void SetValue(Float_t value, Int_t ievt)
set MVA response
UInt_t GetTrainingROOTVersionCode() const
Definition: MethodBase.h:379
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fMeanS
Definition: MethodBase.h:648
Double_t ElapsedSeconds(void)
computes elapsed tim in seconds
Definition: Timer.cxx:125
char * Form(const char *fmt,...)
Ssiz_t Length() const
Definition: TString.h:386
void ReadFromXML(void *varnode)
read VariableInfo from stream
Bool_t Help() const
Definition: MethodBase.h:491
Int_t fNsmoothMVAPdf
Definition: MethodBase.h:714
Bool_t fTxtWeightsOnly
Definition: MethodBase.h:712
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1080
const TString & GetJobName() const
Definition: MethodBase.h:319
Double_t GetXmin() const
Definition: PDF.h:104
const TString & GetMethodName() const
Definition: MethodBase.h:320
Linear interpolation class.
TDirectory * fBaseDir
Definition: MethodBase.h:612
UInt_t GetNSpectators(bool all=kTRUE) const
virtual Double_t Eval(Double_t x) const =0
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:290
Bool_t fHasMVAPdfs
Definition: MethodBase.h:667
TSpline * fSplTrainEffBvsS
Definition: MethodBase.h:643
static constexpr double m2
float xmax
Definition: THbookFile.cxx:93
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:354
Tools & gTools()
Gaussian Transformation of input variables.
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
Definition: MethodBase.cxx:514
void BuildTransformationFromVarInfo(const std::vector< TMVA::VariableInfo > &var)
this method is only used when building a normalization transformation from old text files in this cas...
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:610
Bool_t IsSilentFile()
Definition: MethodBase.h:368
virtual Double_t GetSignificance() const
compute significance of mean difference
Linear interpolation class.
void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part) const
create transformation function
TString GetWeightFileName() const
retrieve weight file name
Linear interpolation of TGraph.
Definition: TSpline1.h:43
Double_t GetSignalReferenceCutOrientation() const
Definition: MethodBase.h:350
REAL epsilon
Definition: triangle.c:617
void ProcessBaseOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodBase.cxx:545
Int_t FindVarIndex(const TString &) const
find variable by name
UInt_t GetNVariables() const
Definition: MethodBase.h:334
std::vector< const std::vector< TMVA::Event * > * > fEventCollections
Definition: MethodBase.h:695
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
TString fVerbosityLevelString
Definition: MethodBase.h:664
Double_t fRmsS
Definition: MethodBase.h:650
UInt_t fBackgroundClass
Definition: MethodBase.h:677
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: PDF.cxx:823
TList * GetStorage() const
Definition: Results.h:73
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:401
int Ssiz_t
Definition: RtypesCore.h:63
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:438
#define ClassImp(name)
Definition: Rtypes.h:359
void SetTestTime(Double_t testTime)
Definition: MethodBase.h:161
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:749
double Double_t
Definition: RtypesCore.h:55
std::vector< Double_t > GetBestMultiClassCuts(UInt_t targetClass)
calculate the best working point (optimal cut values) for the multiclass classifier ...
void SetWeightFileName(TString)
set the weight file name (depreciated)
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
Double_t GetXmax() const
Definition: PDF.h:105
Describe directory structure in memory.
Definition: TDirectory.h:34
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:585
Bool_t IsNormalised() const
Definition: MethodBase.h:483
Double_t GetTrainingSumBackgrWeights()
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:130
int type
Definition: TGX11.cxx:120
Class which takes the results of a multiclass classification.
static RooMathCoreReg dummy
virtual void GetHelpMessage() const =0
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1173
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
void AddVarsXMLTo(void *parent) const
write variable info to XML
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:570
The TH1 histogram class.
Definition: TH1.h:56
const Bool_t Use_Splines_for_Eff_
Definition: MethodBase.cxx:134
static constexpr double s
const char * AsString() const
Return the date & time as a string (ctime() format).
Definition: TDatime.cxx:101
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:96
void AddPreDefVal(const T &)
Definition: Configurable.h:168
IPythonInteractive()
standard constructor
Definition: MethodBase.cxx:151
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------— ...
ClassInfo * AddClass(const TString &className)
void AddXMLTo(void *parent)
XML file writing.
Definition: PDF.cxx:922
UInt_t GetNumber() const
Definition: ClassInfo.h:65
Bool_t fConstructedFromWeightFile
Definition: MethodBase.h:607
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:428
TString fVarTransformString
Definition: MethodBase.h:656
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:788
void ComputeStat(const std::vector< TMVA::Event *> &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:214
const TString & GetOptions() const
Definition: Configurable.h:84
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
void SetConfigName(const char *n)
Definition: Configurable.h:63
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
void ValidatePDF(TH1 *original=0) const
comparison of original histogram with reference PDF
Definition: PDF.cxx:581
Float_t GetAchievableEff(UInt_t cls)
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
Types::EMVA fMethodType
Definition: MethodBase.h:603
char Char_t
Definition: RtypesCore.h:29
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition: RootFinder.h:48
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
Ranking * fRanking
Definition: MethodBase.h:574
virtual void SetXTitle(const char *title)
Definition: TH1.h:405
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:969
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:497
void ReadFromStream(std::istream &istr)
read VariableInfo from stream
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
TString GetMethodTypeName() const
Definition: MethodBase.h:321
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
void ProcessOptions()
Definition: PDF.cxx:873
Class that is the base-class for a vector of result.
Definition: Results.h:57
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
void AddToXML(void *varnode)
write class to XML
Double_t fSignalReferenceCut
the data set information (sometimes needed)
Definition: MethodBase.h:596
const Event * fTmpEvent
Definition: MethodBase.h:401
void SetWeightFileDir(TString fileDir)
set directory of weight file
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:349
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
Definition: TH1.cxx:8276
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
TH1 * GetOriginalHist() const
Definition: PDF.h:94
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
Definition: TDirectory.cxx:400
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:412
Int_t fNbinsMVAoutput
Definition: MethodBase.h:579
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:284
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:215
virtual Double_t GetTrainingEfficiency(const TString &)
void AddOptionsXMLTo(void *parent) const
write options to XML file
Bool_t fSilentFile
Definition: MethodBase.h:618
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
Definition: PDF.cxx:657
Double_t fXmax
Definition: MethodBase.h:653
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1975
Bool_t IsSignal(const Event *ev) const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:190
std::vector< Bool_t > * GetValueVectorTypes()
ECutOrientation GetCutOrientation() const
Definition: MethodBase.h:539
void InitBase()
default initialization called by all constructors
Definition: MethodBase.cxx:446
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:584
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:426
void AddTargetsXMLTo(void *parent) const
write target info to XML
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:86
virtual Int_t GetNbinsX() const
Definition: TH1.h:291
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
Double_t Sqrt(Double_t x)
Definition: TMath.h:590
Class for type info of MVA input variable.
Definition: VariableInfo.h:47
const TString & GetTestvarName() const
Definition: MethodBase.h:324
virtual Int_t GetSize() const
Definition: TCollection.h:180
virtual void ReadWeightsFromStream(std::istream &)=0
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:291
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:330
TString fFileDir
Definition: MethodBase.h:624
const Bool_t kTRUE
Definition: RtypesCore.h:87
Types::EMVA GetMethodType() const
Definition: MethodBase.h:322
Double_t GetXmax() const
Definition: TAxis.h:134
void CheckForUnusedOptions() const
checks for unused options in option string
virtual Int_t GetMaximumBin() const
Return location of bin with maximum value in the range.
Definition: TH1.cxx:7837
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
virtual void TestClassification()
initialization
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write all objects in this collection.
const Event * GetEvent() const
Definition: DataSet.cxx:202
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:94
void SetExternalLink(void *p)
Definition: VariableInfo.h:73
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:425
char name[80]
Definition: TGX11.cxx:109
Class that is the base-class for a vector of result.
Bool_t fSetupCompleted
Definition: MethodBase.h:698
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:315
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:829
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:353
void WriteOptionsToStream(std::ostream &o, const TString &prefix) const
write options to output stream (e.g. in writing the MVA weight files
void Resize(Ssiz_t n)
Resize the string. Truncate or add blanks as necessary.
Definition: TString.cxx:1069
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
Definition: TDatime.h:37
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition: PDF.cxx:704
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)
void SetConfigDescription(const char *d)
Definition: Configurable.h:64
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition: Tools.cxx:133
virtual void Close(Option_t *option="")
Close a file.
Definition: TFile.cxx:916
const char * Data() const
Definition: TString.h:345