Logo ROOT   6.14/05
Reference Guide
MethodBase.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21  * *
22  * Copyright (c) 2005-2011: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  * *
32  **********************************************************************************/
33 
34 /*! \class TMVA::MethodBase
35 \ingroup TMVA
36 
37  Virtual base Class for all MVA method
38 
39  MethodBase hosts several specific evaluation methods.
40 
41  The kind of MVA that provides optimal performance in an analysis strongly
42  depends on the particular application. The evaluation factory provides a
43  number of numerical benchmark results to directly assess the performance
44  of the MVA training on the independent test sample. These are:
45 
46  - The _signal efficiency_ at three representative background efficiencies
47  (which is 1 &minus; rejection).
48  - The _significance_ of an MVA estimator, defined by the difference
49  between the MVA mean values for signal and background, divided by the
50  quadratic sum of their root mean squares.
51  - The _separation_ of an MVA _x_, defined by the integral
52  \f[
53  \frac{1}{2} \int \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx
54  \f]
55  where
56  \f$ S(x) \f$ and \f$ B(x) \f$ are the signal and background distributions,
57  respectively. The separation is zero for identical signal and background MVA
58  shapes, and it is one for disjunctive shapes.
59  - The average, \f$ \int x \mu (S(x)) dx \f$, of the signal \f$ \mu_{transform} \f$.
60  The \f$ \mu_{transform} \f$ of an MVA denotes the transformation that yields
61  a uniform background distribution. In this way, the signal distributions
62  \f$ S(x) \f$ can be directly compared among the various MVAs. The stronger
63  \f$ S(x) \f$ peaks towards one, the better is the discrimination of the MVA.
64  The \f$ \mu_{transform} \f$ is
65  [documented here](http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html).
66 
67  The MVA standard output also prints the linear correlation coefficients between
68  signal and background, which can be useful to eliminate variables that exhibit too
69  strong correlations.
70 */
71 
72 #include "TMVA/MethodBase.h"
73 
74 #include "TMVA/Config.h"
75 #include "TMVA/Configurable.h"
76 #include "TMVA/DataSetInfo.h"
77 #include "TMVA/DataSet.h"
78 #include "TMVA/Factory.h"
79 #include "TMVA/IMethod.h"
80 #include "TMVA/MsgLogger.h"
81 #include "TMVA/PDF.h"
82 #include "TMVA/Ranking.h"
83 #include "TMVA/Factory.h"
84 #include "TMVA/DataLoader.h"
85 #include "TMVA/Tools.h"
86 #include "TMVA/Results.h"
88 #include "TMVA/ResultsRegression.h"
89 #include "TMVA/ResultsMulticlass.h"
90 #include "TMVA/RootFinder.h"
91 #include "TMVA/Timer.h"
92 #include "TMVA/Tools.h"
93 #include "TMVA/TSpline1.h"
94 #include "TMVA/Types.h"
98 #include "TMVA/VariableInfo.h"
101 #include "TMVA/VariableTransform.h"
102 #include "TMVA/Version.h"
103 
104 #include "TROOT.h"
105 #include "TSystem.h"
106 #include "TObjString.h"
107 #include "TQObject.h"
108 #include "TSpline.h"
109 #include "TMatrix.h"
110 #include "TMath.h"
111 #include "TH1F.h"
112 #include "TH2F.h"
113 #include "TFile.h"
114 #include "TKey.h"
115 #include "TGraph.h"
116 #include "Riostream.h"
117 #include "TXMLEngine.h"
118 
119 #include <iomanip>
120 #include <iostream>
121 #include <fstream>
122 #include <sstream>
123 #include <cstdlib>
124 #include <algorithm>
125 #include <limits>
126 
127 
129 
130 using std::endl;
131 using std::atof;
132 
133 //const Int_t MethodBase_MaxIterations_ = 200;
135 
136 //const Int_t NBIN_HIST_PLOT = 100;
137 const Int_t NBIN_HIST_HIGH = 10000;
138 
139 #ifdef _WIN32
140 /* Disable warning C4355: 'this' : used in base member initializer list */
141 #pragma warning ( disable : 4355 )
142 #endif
143 
144 
145 #include "TGraph.h"
146 #include "TMultiGraph.h"
147 
148 ////////////////////////////////////////////////////////////////////////////////
149 /// standard constructor
150 
152 {
153  fNumGraphs = 0;
154  fIndex = 0;
155 }
156 
157 ////////////////////////////////////////////////////////////////////////////////
158 /// standard destructor
160 {
161  if (fMultiGraph){
162  delete fMultiGraph;
163  fMultiGraph = nullptr;
164  }
165  return;
166 }
167 
168 ////////////////////////////////////////////////////////////////////////////////
169 /// This function gets some title and it creates a TGraph for every title.
170 /// It also sets up the style for every TGraph. All graphs are added to a single TMultiGraph.
171 ///
172 /// \param[in] graphTitles vector of titles
173 
174 void TMVA::IPythonInteractive::Init(std::vector<TString>& graphTitles)
175 {
176  if (fNumGraphs!=0){
177  std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
178  return;
179  }
180  Int_t color = 2;
181  for(auto& title : graphTitles){
182  fGraphs.push_back( new TGraph() );
183  fGraphs.back()->SetTitle(title);
184  fGraphs.back()->SetName(title);
185  fGraphs.back()->SetFillColor(color);
186  fGraphs.back()->SetLineColor(color);
187  fGraphs.back()->SetMarkerColor(color);
188  fMultiGraph->Add(fGraphs.back());
189  color += 2;
190  fNumGraphs += 1;
191  }
192  return;
193 }
194 
195 ////////////////////////////////////////////////////////////////////////////////
196 /// This function sets the point number to 0 for all graphs.
197 
199 {
200  for(Int_t i=0; i<fNumGraphs; i++){
201  fGraphs[i]->Set(0);
202  }
203 }
204 
205 ////////////////////////////////////////////////////////////////////////////////
206 /// This function is used only in 2 TGraph case, and it will add new data points to graphs.
207 ///
208 /// \param[in] x the x coordinate
209 /// \param[in] y1 the y coordinate for the first TGraph
210 /// \param[in] y2 the y coordinate for the second TGraph
211 
213 {
214  fGraphs[0]->Set(fIndex+1);
215  fGraphs[1]->Set(fIndex+1);
216  fGraphs[0]->SetPoint(fIndex, x, y1);
217  fGraphs[1]->SetPoint(fIndex, x, y2);
218  fIndex++;
219  return;
220 }
221 
222 ////////////////////////////////////////////////////////////////////////////////
223 /// This function can add data points to as many TGraphs as we have.
224 ///
225 /// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
226 /// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
227 
228 void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
229 {
230  for(Int_t i=0; i<fNumGraphs;i++){
231  fGraphs[i]->Set(fIndex+1);
232  fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
233  }
234  fIndex++;
235  return;
236 }
237 
238 
239 ////////////////////////////////////////////////////////////////////////////////
240 /// standard constructor
241 
243  Types::EMVA methodType,
244  const TString& methodTitle,
245  DataSetInfo& dsi,
246  const TString& theOption) :
247  IMethod(),
248  Configurable ( theOption ),
249  fTmpEvent ( 0 ),
250  fRanking ( 0 ),
251  fInputVars ( 0 ),
252  fAnalysisType ( Types::kNoAnalysisType ),
253  fRegressionReturnVal ( 0 ),
254  fMulticlassReturnVal ( 0 ),
255  fDataSetInfo ( dsi ),
256  fSignalReferenceCut ( 0.5 ),
257  fSignalReferenceCutOrientation( 1. ),
258  fVariableTransformType ( Types::kSignal ),
259  fJobName ( jobName ),
260  fMethodName ( methodTitle ),
261  fMethodType ( methodType ),
262  fTestvar ( "" ),
263  fTMVATrainingVersion ( TMVA_VERSION_CODE ),
264  fROOTTrainingVersion ( ROOT_VERSION_CODE ),
265  fConstructedFromWeightFile ( kFALSE ),
266  fBaseDir ( 0 ),
267  fMethodBaseDir ( 0 ),
268  fFile ( 0 ),
269  fSilentFile (kFALSE),
270  fModelPersistence (kTRUE),
271  fWeightFile ( "" ),
272  fEffS ( 0 ),
273  fDefaultPDF ( 0 ),
274  fMVAPdfS ( 0 ),
275  fMVAPdfB ( 0 ),
276  fSplS ( 0 ),
277  fSplB ( 0 ),
278  fSpleffBvsS ( 0 ),
279  fSplTrainS ( 0 ),
280  fSplTrainB ( 0 ),
281  fSplTrainEffBvsS ( 0 ),
282  fVarTransformString ( "None" ),
283  fTransformationPointer ( 0 ),
284  fTransformation ( dsi, methodTitle ),
285  fVerbose ( kFALSE ),
286  fVerbosityLevelString ( "Default" ),
287  fHelp ( kFALSE ),
288  fHasMVAPdfs ( kFALSE ),
289  fIgnoreNegWeightsInTraining( kFALSE ),
290  fSignalClass ( 0 ),
291  fBackgroundClass ( 0 ),
292  fSplRefS ( 0 ),
293  fSplRefB ( 0 ),
294  fSplTrainRefS ( 0 ),
295  fSplTrainRefB ( 0 ),
296  fSetupCompleted (kFALSE)
297 {
298  SetTestvarName();
300 
301 // // default extension for weight files
302 }
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 /// constructor used for Testing + Application of the MVA,
306 /// only (no training), using given WeightFiles
307 
309  DataSetInfo& dsi,
310  const TString& weightFile ) :
311  IMethod(),
312  Configurable(""),
313  fTmpEvent ( 0 ),
314  fRanking ( 0 ),
315  fInputVars ( 0 ),
316  fAnalysisType ( Types::kNoAnalysisType ),
317  fRegressionReturnVal ( 0 ),
318  fMulticlassReturnVal ( 0 ),
319  fDataSetInfo ( dsi ),
320  fSignalReferenceCut ( 0.5 ),
321  fVariableTransformType ( Types::kSignal ),
322  fJobName ( "" ),
323  fMethodName ( "MethodBase" ),
324  fMethodType ( methodType ),
325  fTestvar ( "" ),
326  fTMVATrainingVersion ( 0 ),
327  fROOTTrainingVersion ( 0 ),
329  fBaseDir ( 0 ),
330  fMethodBaseDir ( 0 ),
331  fFile ( 0 ),
334  fWeightFile ( weightFile ),
335  fEffS ( 0 ),
336  fDefaultPDF ( 0 ),
337  fMVAPdfS ( 0 ),
338  fMVAPdfB ( 0 ),
339  fSplS ( 0 ),
340  fSplB ( 0 ),
341  fSpleffBvsS ( 0 ),
342  fSplTrainS ( 0 ),
343  fSplTrainB ( 0 ),
344  fSplTrainEffBvsS ( 0 ),
345  fVarTransformString ( "None" ),
347  fTransformation ( dsi, "" ),
348  fVerbose ( kFALSE ),
349  fVerbosityLevelString ( "Default" ),
350  fHelp ( kFALSE ),
351  fHasMVAPdfs ( kFALSE ),
353  fSignalClass ( 0 ),
354  fBackgroundClass ( 0 ),
355  fSplRefS ( 0 ),
356  fSplRefB ( 0 ),
357  fSplTrainRefS ( 0 ),
358  fSplTrainRefB ( 0 ),
360 {
362 // // constructor used for Testing + Application of the MVA,
363 // // only (no training), using given WeightFiles
364 }
365 
366 ////////////////////////////////////////////////////////////////////////////////
367 /// destructor
368 
370 {
371  // destructor
372  if (!fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
373 
374  // destructor
375  if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
376  if (fRanking != 0) delete fRanking;
377 
378  // PDFs
379  if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
380  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
381  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
382 
383  // Splines
384  if (fSplS) { delete fSplS; fSplS = 0; }
385  if (fSplB) { delete fSplB; fSplB = 0; }
386  if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
387  if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
388  if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
389  if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
390  if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
392 
393  for (Int_t i = 0; i < 2; i++ ) {
394  if (fEventCollections.at(i)) {
395  for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
396  it != fEventCollections.at(i)->end(); ++it) {
397  delete (*it);
398  }
399  delete fEventCollections.at(i);
400  fEventCollections.at(i) = 0;
401  }
402  }
403 
406 }
407 
408 ////////////////////////////////////////////////////////////////////////////////
409 /// setup of methods
410 
412 {
413  // setup of methods
414 
415  if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
416  InitBase();
418  Init();
419  DeclareOptions();
421 }
422 
423 ////////////////////////////////////////////////////////////////////////////////
424 /// process all options
425 /// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
426 /// (sometimes, eg, fitters are used which can only be implemented during training phase)
427 
429 {
431  ProcessOptions();
432 }
433 
434 ////////////////////////////////////////////////////////////////////////////////
435 /// check may be overridden by derived class
436 /// (sometimes, eg, fitters are used which can only be implemented during training phase)
437 
439 {
441 }
442 
443 ////////////////////////////////////////////////////////////////////////////////
444 /// default initialization called by all constructors
445 
447 {
448  SetConfigDescription( "Configuration options for classifier architecture and tuning" );
449 
453 
454  fSplTrainS = 0;
455  fSplTrainB = 0;
456  fSplTrainEffBvsS = 0;
457  fMeanS = -1;
458  fMeanB = -1;
459  fRmsS = -1;
460  fRmsB = -1;
461  fXmin = DBL_MAX;
462  fXmax = -DBL_MAX;
464  fSplRefS = 0;
465  fSplRefB = 0;
466 
467  fTrainTime = -1.;
468  fTestTime = -1.;
469 
470  fRanking = 0;
471 
472  // temporary until the move to DataSet is complete
473  fInputVars = new std::vector<TString>;
474  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
475  fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
476  }
479 
480  fEventCollections.resize( 2 );
481  fEventCollections.at(0) = 0;
482  fEventCollections.at(1) = 0;
483 
484  // retrieve signal and background class index
485  if (DataInfo().GetClassInfo("Signal") != 0) {
486  fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
487  }
488  if (DataInfo().GetClassInfo("Background") != 0) {
489  fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
490  }
491 
492  SetConfigDescription( "Configuration options for MVA method" );
493  SetConfigName( TString("Method") + GetMethodTypeName() );
494 }
495 
496 ////////////////////////////////////////////////////////////////////////////////
497 /// define the options (their key words) that can be set in the option string
498 /// here the options valid for ALL MVA methods are declared.
499 ///
500 /// know options:
501 ///
502 /// - VariableTransform=None,Decorrelated,PCA to use transformed variables
503 /// instead of the original ones
504 /// - VariableTransformType=Signal,Background which decorrelation matrix to use
505 /// in the method. Only the Likelihood
506 /// Method can make proper use of independent
507 /// transformations of signal and background
508 /// - fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
509 /// - fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
510 /// - fHasMVAPdfs create PDFs for the MVA outputs
511 /// - V for Verbose output (!V) for non verbos
512 /// - H for Help message
513 
515 {
516  DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
517 
518  DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
519  AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
520  AddPreDefVal( TString("Debug") );
521  AddPreDefVal( TString("Verbose") );
522  AddPreDefVal( TString("Info") );
523  AddPreDefVal( TString("Warning") );
524  AddPreDefVal( TString("Error") );
525  AddPreDefVal( TString("Fatal") );
526 
527  // If True (default): write all training results (weights) as text files only;
528  // if False: write also in ROOT format (not available for all methods - will abort if not
529  fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
530  fNormalise = kFALSE; // OBSOLETE !!!
531 
532  DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
533 
534  DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
535 
536  DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
537 
538  DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
539  "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
540 }
541 
542 ////////////////////////////////////////////////////////////////////////////////
543 /// the option string is decoded, for available options see "DeclareOptions"
544 
546 {
547  if (HasMVAPdfs()) {
548  // setting the default bin num... maybe should be static ? ==> Please no static (JS)
549  // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
550  // reading every PDF's definition and passing the option string to the next one to be read and marked
551  fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
555  fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
559  fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
563 
564  // the final marked option string is written back to the original methodbase
566  }
567 
569  DataInfo(),
571  Log() );
572 
573  if (!HasMVAPdfs()) {
574  if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
575  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
576  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
577  }
578 
579  if (fVerbose) { // overwrites other settings
580  fVerbosityLevelString = TString("Verbose");
581  Log().SetMinType( kVERBOSE );
582  }
583  else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
584  else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
585  else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
586  else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
587  else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
588  else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
589  else if (fVerbosityLevelString != "Default" ) {
590  Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
591  << fVerbosityLevelString << "' unknown." << Endl;
592  }
594 }
595 
596 ////////////////////////////////////////////////////////////////////////////////
597 /// options that are used ONLY for the READER to ensure backward compatibility
598 /// they are hence without any effect (the reader is only reading the training
599 /// options that HAD been used at the training of the .xml weight file at hand
600 
602 {
603  DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
604  DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
605  DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
606  "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
607  AddPreDefVal( TString("Signal") );
608  AddPreDefVal( TString("Background") );
609  DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
610  // Why on earth ?? was this here? Was the verbosity level option meant to 'disappear? Not a good idea i think..
611  // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
612  // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
613  // AddPreDefVal( TString("Debug") );
614  // AddPreDefVal( TString("Verbose") );
615  // AddPreDefVal( TString("Info") );
616  // AddPreDefVal( TString("Warning") );
617  // AddPreDefVal( TString("Error") );
618  // AddPreDefVal( TString("Fatal") );
619  DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
620  DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
621 }
622 
623 
624 ////////////////////////////////////////////////////////////////////////////////
625 /// call the Optimizer with the set of parameters and ranges that
626 /// are meant to be tuned.
627 
628 std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
629 {
630  // this is just a dummy... needs to be implemented for each method
631  // individually (as long as we don't have it automatized via the
632  // configuration string
633 
634  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
635  << GetName() << Endl;
636  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
637 
638  std::map<TString,Double_t> tunedParameters;
639  tunedParameters.size(); // just to get rid of "unused" warning
640  return tunedParameters;
641 
642 }
643 
644 ////////////////////////////////////////////////////////////////////////////////
645 /// set the tuning parameters according to the argument
646 /// This is just a dummy .. have a look at the MethodBDT how you could
647 /// perhaps implement the same thing for the other Classifiers..
648 
649 void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
650 {
651 }
652 
653 ////////////////////////////////////////////////////////////////////////////////
654 
656 {
658  Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
659 
660  // train the MVA method
661  if (Help()) PrintHelpMessage();
662 
663  // all histograms should be created in the method's subdirectory
664  if(!IsSilentFile()) BaseDir()->cd();
665 
666  // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
667  // needed for this classifier
669 
670  // call training of derived MVA
671  Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
672  << "Begin training" << Endl;
673  Long64_t nEvents = Data()->GetNEvents();
674  Timer traintimer( nEvents, GetName(), kTRUE );
675  Train();
676  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
677  << "\tEnd of training " << Endl;
678  SetTrainTime(traintimer.ElapsedSeconds());
679  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
680  << "Elapsed time for training with " << nEvents << " events: "
681  << traintimer.GetElapsedTime() << " " << Endl;
682 
683  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
684  << "\tCreate MVA output for ";
685 
686  // create PDFs for the signal and background MVA distributions (if required)
687  if (DoMulticlass()) {
688  Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
690  }
691  else if (!DoRegression()) {
692 
693  Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
695  if (HasMVAPdfs()) {
696  CreateMVAPdfs();
698  }
699 
700  } else {
701 
702  Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
704 
705  if (HasMVAPdfs() ) {
706  Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
707  CreateMVAPdfs();
708  }
709  }
710 
711  // write the current MVA state into stream
712  // produced are one text file and one ROOT file
714 
715  // produce standalone make class (presently only supported for classification)
716  if ((!DoRegression()) && (fModelPersistence)) MakeClass();
717 
718  // write additional monitoring histograms to main target file (not the weight file)
719  // again, make sure the histograms go into the method's subdirectory
720  if(!IsSilentFile())
721  {
722  BaseDir()->cd();
724  }
725 }
726 
727 ////////////////////////////////////////////////////////////////////////////////
728 
730 {
731  if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
732  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
734  bool truncate = false;
735  TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
736  stddev = sqrt(h1->GetMean());
737  truncate = true;
738  Double_t yq[1], xq[]={0.9};
739  h1->GetQuantiles(1,yq,xq);
740  TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
741  stddev90Percent = sqrt(h2->GetMean());
742  delete h1;
743  delete h2;
744 }
745 
746 ////////////////////////////////////////////////////////////////////////////////
747 /// prepare tree branch with the method's discriminating variable
748 
750 {
751  Data()->SetCurrentType(type);
752 
753  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
754 
756 
757  Long64_t nEvents = Data()->GetNEvents();
758 
759  // use timer
760  Timer timer( nEvents, GetName(), kTRUE );
761  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
762  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
763 
764  regRes->Resize( nEvents );
765 
766  // Drawing the progress bar every event was causing a huge slowdown in the evaluation time
767  // So we set some parameters to draw the progress bar a total of totalProgressDraws, i.e. only draw every 1 in 100
768 
769  Int_t totalProgressDraws = 100; // total number of times to update the progress bar
770  Int_t drawProgressEvery = 1; // draw every nth event such that we have a total of totalProgressDraws
771  if(nEvents >= totalProgressDraws) drawProgressEvery = nEvents/totalProgressDraws;
772 
773  for (Int_t ievt=0; ievt<nEvents; ievt++) {
774 
775  Data()->SetCurrentEvent(ievt);
776  std::vector< Float_t > vals = GetRegressionValues();
777  regRes->SetValue( vals, ievt );
778 
779  // Only draw the progress bar once in a while, doing this every event causes the evaluation to be ridiculously slow
780  if(ievt % drawProgressEvery == 0 || ievt==nEvents-1) timer.DrawProgressBar( ievt );
781  }
782 
783  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
784  << "Elapsed time for evaluation of " << nEvents << " events: "
785  << timer.GetElapsedTime() << " " << Endl;
786 
787  // store time used for testing
788  if (type==Types::kTesting)
789  SetTestTime(timer.ElapsedSeconds());
790 
791  TString histNamePrefix(GetTestvarName());
792  histNamePrefix += (type==Types::kTraining?"train":"test");
793  regRes->CreateDeviationHistograms( histNamePrefix );
794 }
795 
796 ////////////////////////////////////////////////////////////////////////////////
797 /// prepare tree branch with the method's discriminating variable
798 
800 {
801  Data()->SetCurrentType(type);
802 
803  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
804 
805  ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
806  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
807 
808  Long64_t nEvents = Data()->GetNEvents();
809 
810  // use timer
811  Timer timer( nEvents, GetName(), kTRUE );
812 
813  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
814  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
815 
816  resMulticlass->Resize( nEvents );
817  for (Int_t ievt=0; ievt<nEvents; ievt++) {
818  Data()->SetCurrentEvent(ievt);
819  std::vector< Float_t > vals = GetMulticlassValues();
820  resMulticlass->SetValue( vals, ievt );
821  timer.DrawProgressBar( ievt );
822  }
823 
824  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
825  << "Elapsed time for evaluation of " << nEvents << " events: "
826  << timer.GetElapsedTime() << " " << Endl;
827 
828  // store time used for testing
829  if (type==Types::kTesting)
830  SetTestTime(timer.ElapsedSeconds());
831 
832  TString histNamePrefix(GetTestvarName());
833  histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
834 
835  resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
836  resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
837 }
838 
839 ////////////////////////////////////////////////////////////////////////////////
840 
841 void TMVA::MethodBase::NoErrorCalc(Double_t* const err, Double_t* const errUpper) {
842  if (err) *err=-1;
843  if (errUpper) *errUpper=-1;
844 }
845 
846 ////////////////////////////////////////////////////////////////////////////////
847 
848 Double_t TMVA::MethodBase::GetMvaValue( const Event* const ev, Double_t* err, Double_t* errUpper ) {
849  fTmpEvent = ev;
850  Double_t val = GetMvaValue(err, errUpper);
851  fTmpEvent = 0;
852  return val;
853 }
854 
855 ////////////////////////////////////////////////////////////////////////////////
856 /// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
857 /// for a quick determination if an event would be selected as signal or background
858 
861 }
862 ////////////////////////////////////////////////////////////////////////////////
863 /// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
864 /// for a quick determination if an event with this mva output value would be selected as signal or background
865 
868 }
869 
870 ////////////////////////////////////////////////////////////////////////////////
871 /// prepare tree branch with the method's discriminating variable
872 
874 {
875  Data()->SetCurrentType(type);
876 
877  ResultsClassification* clRes =
879 
880  Long64_t nEvents = Data()->GetNEvents();
881  clRes->Resize( nEvents );
882 
883  // use timer
884  Timer timer( nEvents, GetName(), kTRUE );
885  std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
886 
887  // store time used for testing
888  if (type==Types::kTesting)
889  SetTestTime(timer.ElapsedSeconds());
890 
891  // load mva values to results object
892  for (Int_t ievt=0; ievt<nEvents; ievt++) {
893  clRes->SetValue( mvaValues[ievt], ievt );
894  }
895 }
896 
897 ////////////////////////////////////////////////////////////////////////////////
898 /// get all the MVA values for the events of the current Data type
899 std::vector<Double_t> TMVA::MethodBase::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
900 {
901 
902  Long64_t nEvents = Data()->GetNEvents();
903  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
904  if (firstEvt < 0) firstEvt = 0;
905  std::vector<Double_t> values(lastEvt-firstEvt);
906  // log in case of looping on all the events
907  nEvents = values.size();
908 
909  // use timer
910  Timer timer( nEvents, GetName(), kTRUE );
911 
912  if (logProgress)
913  Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
914  << "Evaluation of " << GetMethodName() << " on "
915  << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
916  << " sample (" << nEvents << " events)" << Endl;
917 
918  for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
919  Data()->SetCurrentEvent(ievt);
920  values[ievt] = GetMvaValue();
921 
922  // print progress
923  if (logProgress) {
924  Int_t modulo = Int_t(nEvents/100);
925  if (modulo <= 0 ) modulo = 1;
926  if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
927  }
928  }
929  if (logProgress) {
930  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
931  << "Elapsed time for evaluation of " << nEvents << " events: "
932  << timer.GetElapsedTime() << " " << Endl;
933  }
934 
935  return values;
936 }
937 
938 ////////////////////////////////////////////////////////////////////////////////
939 /// prepare tree branch with the method's discriminating variable
940 
942 {
943  Data()->SetCurrentType(type);
944 
945  ResultsClassification* mvaProb =
947 
948  Long64_t nEvents = Data()->GetNEvents();
949 
950  // use timer
951  Timer timer( nEvents, GetName(), kTRUE );
952 
953  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
954  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
955 
956  mvaProb->Resize( nEvents );
957  for (Int_t ievt=0; ievt<nEvents; ievt++) {
958 
959  Data()->SetCurrentEvent(ievt);
960  Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
961  if (proba < 0) break;
962  mvaProb->SetValue( proba, ievt );
963 
964  // print progress
965  Int_t modulo = Int_t(nEvents/100);
966  if (modulo <= 0 ) modulo = 1;
967  if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
968  }
969 
970  Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
971  << "Elapsed time for evaluation of " << nEvents << " events: "
972  << timer.GetElapsedTime() << " " << Endl;
973 }
974 
975 ////////////////////////////////////////////////////////////////////////////////
976 /// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
977 ///
978 /// - bias = average deviation
979 /// - dev = average absolute deviation
980 /// - rms = rms of deviation
981 
983  Double_t& dev, Double_t& devT,
984  Double_t& rms, Double_t& rmsT,
985  Double_t& mInf, Double_t& mInfT,
986  Double_t& corr,
988 {
989  Types::ETreeType savedType = Data()->GetCurrentType();
990  Data()->SetCurrentType(type);
991 
992  bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
993  Double_t sumw = 0;
994  Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
995  const Int_t nevt = GetNEvents();
996  Float_t* rV = new Float_t[nevt];
997  Float_t* tV = new Float_t[nevt];
998  Float_t* wV = new Float_t[nevt];
999  Float_t xmin = 1e30, xmax = -1e30;
1000  Log() << kINFO << "Calculate regression for all events" << Endl;
1001  Timer timer( nevt, GetName(), kTRUE );
1002  for (Long64_t ievt=0; ievt<nevt; ievt++) {
1003 
1004  const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
1005  Float_t t = ev->GetTarget(0);
1006  Float_t w = ev->GetWeight();
1007  Float_t r = GetRegressionValues()[0];
1008  Float_t d = (r-t);
1009 
1010  // find min/max
1011  xmin = TMath::Min(xmin, TMath::Min(t, r));
1012  xmax = TMath::Max(xmax, TMath::Max(t, r));
1013 
1014  // store for truncated RMS computation
1015  rV[ievt] = r;
1016  tV[ievt] = t;
1017  wV[ievt] = w;
1018 
1019  // compute deviation-squared
1020  sumw += w;
1021  bias += w * d;
1022  dev += w * TMath::Abs(d);
1023  rms += w * d * d;
1024 
1025  // compute correlation between target and regression estimate
1026  m1 += t*w; s1 += t*t*w;
1027  m2 += r*w; s2 += r*r*w;
1028  s12 += t*r;
1029  if ((ievt & 0xFF) == 0) timer.DrawProgressBar(ievt);
1030  }
1031  timer.DrawProgressBar(nevt - 1);
1032  Log() << kINFO << "Elapsed time for evaluation of " << nevt << " events: "
1033  << timer.GetElapsedTime() << " " << Endl;
1034 
1035  // standard quantities
1036  bias /= sumw;
1037  dev /= sumw;
1038  rms /= sumw;
1039  rms = TMath::Sqrt(rms - bias*bias);
1040 
1041  // correlation
1042  m1 /= sumw;
1043  m2 /= sumw;
1044  corr = s12/sumw - m1*m2;
1045  corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1046 
1047  // create histogram required for computation of mutual information
1048  TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1049  TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1050 
1051  // compute truncated RMS and fill histogram
1052  Double_t devMax = bias + 2*rms;
1053  Double_t devMin = bias - 2*rms;
1054  sumw = 0;
1055  int ic=0;
1056  for (Long64_t ievt=0; ievt<nevt; ievt++) {
1057  Float_t d = (rV[ievt] - tV[ievt]);
1058  hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1059  if (d >= devMin && d <= devMax) {
1060  sumw += wV[ievt];
1061  biasT += wV[ievt] * d;
1062  devT += wV[ievt] * TMath::Abs(d);
1063  rmsT += wV[ievt] * d * d;
1064  histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1065  ic++;
1066  }
1067  }
1068  biasT /= sumw;
1069  devT /= sumw;
1070  rmsT /= sumw;
1071  rmsT = TMath::Sqrt(rmsT - biasT*biasT);
1072  mInf = gTools().GetMutualInformation( *hist );
1073  mInfT = gTools().GetMutualInformation( *histT );
1074 
1075  delete hist;
1076  delete histT;
1077 
1078  delete [] rV;
1079  delete [] tV;
1080  delete [] wV;
1081 
1082  Data()->SetCurrentType(savedType);
1083 }
1084 
1085 
1086 ////////////////////////////////////////////////////////////////////////////////
1087 /// test multiclass classification
1088 
1090 {
1092  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1093 
1094  // GA evaluation of best cut for sig eff * sig pur. Slow, disabled for now.
1095  // Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test
1096  // data..." << Endl; for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1097  // resMulticlass->GetBestMultiClassCuts(icls);
1098  // }
1099 
1100  // Create histograms for use in TMVA GUI
1101  TString histNamePrefix(GetTestvarName());
1102  TString histNamePrefixTest{histNamePrefix + "_Test"};
1103  TString histNamePrefixTrain{histNamePrefix + "_Train"};
1104 
1105  resMulticlass->CreateMulticlassHistos(histNamePrefixTest, fNbinsMVAoutput, fNbinsH);
1106  resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTest);
1107 
1108  resMulticlass->CreateMulticlassHistos(histNamePrefixTrain, fNbinsMVAoutput, fNbinsH);
1109  resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTrain);
1110 }
1111 
1112 
1113 ////////////////////////////////////////////////////////////////////////////////
1114 /// initialization
1115 
1117 {
1119 
1120  ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
1122 
1123  // sanity checks: tree must exist, and theVar must be in tree
1124  if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1125  Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1126  << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1127  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1128  << " not found in tree" << Endl;
1129  }
1130 
1131  // basic statistics operations are made in base class
1134 
1135  // choose reasonable histogram ranges, by removing outliers
1136  Double_t nrms = 10;
1137  fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1138  fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1139 
1140  // determine cut orientation
1142 
1143  // fill 2 types of histograms for the various analyses
1144  // this one is for actual plotting
1145 
1146  Double_t sxmax = fXmax+0.00001;
1147 
1148  // classifier response distributions for training sample
1149  // MVA plots used for graphics representation (signal)
1150  TString TestvarName;
1151  if(IsSilentFile())
1152  {
1153  TestvarName=Form("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1154  }else
1155  {
1156  TestvarName=GetTestvarName();
1157  }
1158  TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1159  TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1160  mvaRes->Store(mva_s, "MVA_S");
1161  mvaRes->Store(mva_b, "MVA_B");
1162  mva_s->Sumw2();
1163  mva_b->Sumw2();
1164 
1165  TH1* proba_s = 0;
1166  TH1* proba_b = 0;
1167  TH1* rarity_s = 0;
1168  TH1* rarity_b = 0;
1169  if (HasMVAPdfs()) {
1170  // P(MVA) plots used for graphics representation
1171  proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1172  proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1173  mvaRes->Store(proba_s, "Prob_S");
1174  mvaRes->Store(proba_b, "Prob_B");
1175  proba_s->Sumw2();
1176  proba_b->Sumw2();
1177 
1178  // R(MVA) plots used for graphics representation
1179  rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1180  rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1181  mvaRes->Store(rarity_s, "Rar_S");
1182  mvaRes->Store(rarity_b, "Rar_B");
1183  rarity_s->Sumw2();
1184  rarity_b->Sumw2();
1185  }
1186 
1187  // MVA plots used for efficiency calculations (large number of bins)
1188  TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1189  TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1190  mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1191  mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1192  mva_eff_s->Sumw2();
1193  mva_eff_b->Sumw2();
1194 
1195  // fill the histograms
1196 
1197  ResultsClassification* mvaProb = dynamic_cast<ResultsClassification*>
1199 
1200  Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1201  if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1202  std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1203 
1204  //LM: this is needed to avoid crashes in ROOCCURVE
1205  if ( mvaRes->GetSize() != GetNEvents() ) {
1206  Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1207  assert(mvaRes->GetSize() == GetNEvents());
1208  }
1209 
1210  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1211 
1212  const Event* ev = GetEvent(ievt);
1213  Float_t v = (*mvaRes)[ievt][0];
1214  Float_t w = ev->GetWeight();
1215 
1216  if (DataInfo().IsSignal(ev)) {
1217  mvaResTypes->push_back(kTRUE);
1218  mva_s ->Fill( v, w );
1219  if (mvaProb) {
1220  proba_s->Fill( (*mvaProb)[ievt][0], w );
1221  rarity_s->Fill( GetRarity( v ), w );
1222  }
1223 
1224  mva_eff_s ->Fill( v, w );
1225  }
1226  else {
1227  mvaResTypes->push_back(kFALSE);
1228  mva_b ->Fill( v, w );
1229  if (mvaProb) {
1230  proba_b->Fill( (*mvaProb)[ievt][0], w );
1231  rarity_b->Fill( GetRarity( v ), w );
1232  }
1233  mva_eff_b ->Fill( v, w );
1234  }
1235  }
1236 
1237  // uncomment those (and several others if you want unnormalized output
1238  gTools().NormHist( mva_s );
1239  gTools().NormHist( mva_b );
1240  gTools().NormHist( proba_s );
1241  gTools().NormHist( proba_b );
1242  gTools().NormHist( rarity_s );
1243  gTools().NormHist( rarity_b );
1244  gTools().NormHist( mva_eff_s );
1245  gTools().NormHist( mva_eff_b );
1246 
1247  // create PDFs from histograms, using default splines, and no additional smoothing
1248  if (fSplS) { delete fSplS; fSplS = 0; }
1249  if (fSplB) { delete fSplB; fSplB = 0; }
1250  fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1251  fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1252 }
1253 
1254 ////////////////////////////////////////////////////////////////////////////////
1255 /// general method used in writing the header of the weight files where
1256 /// the used variables, variable transformation type etc. is specified
1257 
1258 void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1259 {
1260  TString prefix = "";
1261  UserGroup_t * userInfo = gSystem->GetUserInfo();
1262 
1263  tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1264  tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1265  tf.setf(std::ios::left);
1266  tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1267  << GetTrainingTMVAVersionCode() << "]" << std::endl;
1268  tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1269  << GetTrainingROOTVersionCode() << "]" << std::endl;
1270  tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1271  tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1272  tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1273  tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1274  tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1275 
1276  TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1277 
1278  tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1279  tf << prefix << std::endl;
1280 
1281  delete userInfo;
1282 
1283  // First write all options
1284  tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1285  WriteOptionsToStream( tf, prefix );
1286  tf << prefix << std::endl;
1287 
1288  // Second write variable info
1289  tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1290  WriteVarsToStream( tf, prefix );
1291  tf << prefix << std::endl;
1292 }
1293 
1294 ////////////////////////////////////////////////////////////////////////////////
1295 /// xml writing
1296 
1297 void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1298 {
1299  void* it = gTools().AddChild(gi,"Info");
1300  gTools().AddAttr(it,"name", name);
1301  gTools().AddAttr(it,"value", value);
1302 }
1303 
1304 ////////////////////////////////////////////////////////////////////////////////
1305 
1307  if (analysisType == Types::kRegression) {
1308  AddRegressionOutput( type );
1309  } else if (analysisType == Types::kMulticlass) {
1310  AddMulticlassOutput( type );
1311  } else {
1312  AddClassifierOutput( type );
1313  if (HasMVAPdfs())
1314  AddClassifierOutputProb( type );
1315  }
1316 }
1317 
1318 ////////////////////////////////////////////////////////////////////////////////
1319 /// general method used in writing the header of the weight files where
1320 /// the used variables, variable transformation type etc. is specified
1321 
1322 void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1323 {
1324  if (!parent) return;
1325 
1326  UserGroup_t* userInfo = gSystem->GetUserInfo();
1327 
1328  void* gi = gTools().AddChild(parent, "GeneralInfo");
1329  AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1330  AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1331  AddInfoItem( gi, "Creator", userInfo->fUser);
1332  TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1333  AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1334  AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1335  AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1336  AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1337 
1338  Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1339  TString analysisType((aType==Types::kRegression) ? "Regression" :
1340  (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1341  AddInfoItem( gi, "AnalysisType", analysisType );
1342  delete userInfo;
1343 
1344  // write options
1345  AddOptionsXMLTo( parent );
1346 
1347  // write variable info
1348  AddVarsXMLTo( parent );
1349 
1350  // write spectator info
1351  if (fModelPersistence)
1352  AddSpectatorsXMLTo( parent );
1353 
1354  // write class info if in multiclass mode
1355  AddClassesXMLTo(parent);
1356 
1357  // write target info if in regression mode
1358  if (DoRegression()) AddTargetsXMLTo(parent);
1359 
1360  // write transformations
1361  GetTransformationHandler(false).AddXMLTo( parent );
1362 
1363  // write MVA variable distributions
1364  void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1365  if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1366  if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1367 
1368  // write weights
1369  AddWeightsXMLTo( parent );
1370 }
1371 
1372 ////////////////////////////////////////////////////////////////////////////////
1373 /// write reference MVA distributions (and other information)
1374 /// to a ROOT type weight file
1375 
1377 {
1378  Bool_t addDirStatus = TH1::AddDirectoryStatus();
1379  TH1::AddDirectory( 0 ); // this avoids the binding of the hists in PDF to the current ROOT file
1380  fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1381  fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1382 
1383  TH1::AddDirectory( addDirStatus );
1384 
1385  ReadWeightsFromStream( rf );
1386 
1387  SetTestvarName();
1388 }
1389 
1390 ////////////////////////////////////////////////////////////////////////////////
1391 /// write options and weights to file
1392 /// note that each one text file for the main configuration information
1393 /// and one ROOT file for ROOT objects are created
1394 
1396 {
1397  // ---- create the text file
1398  TString tfname( GetWeightFileName() );
1399 
1400  // writing xml file
1401  TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1402  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1403  << "Creating xml weight file: "
1404  << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1405  void* doc = gTools().xmlengine().NewDoc();
1406  void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1407  gTools().xmlengine().DocSetRootElement(doc,rootnode);
1408  gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1409  WriteStateToXML(rootnode);
1410  gTools().xmlengine().SaveDoc(doc,xmlfname);
1411  gTools().xmlengine().FreeDoc(doc);
1412 }
1413 
1414 ////////////////////////////////////////////////////////////////////////////////
1415 /// Function to write options and weights to file
1416 
1418 {
1419  // get the filename
1420 
1421  TString tfname(GetWeightFileName());
1422 
1423  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1424  << "Reading weight file: "
1425  << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1426 
1427  if (tfname.EndsWith(".xml") ) {
1428 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,29,0)
1429  void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1430 #else
1431  void* doc = gTools().xmlengine().ParseFile(tfname);
1432 #endif
1433  if (!doc) {
1434  Log() << kFATAL << "Error parsing XML file " << tfname << Endl;
1435  }
1436  void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1437  ReadStateFromXML(rootnode);
1438  gTools().xmlengine().FreeDoc(doc);
1439  }
1440  else {
1441  std::filebuf fb;
1442  fb.open(tfname.Data(),std::ios::in);
1443  if (!fb.is_open()) { // file not found --> Error
1444  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1445  << "Unable to open input weight file: " << tfname << Endl;
1446  }
1447  std::istream fin(&fb);
1448  ReadStateFromStream(fin);
1449  fb.close();
1450  }
1451  if (!fTxtWeightsOnly) {
1452  // ---- read the ROOT file
1453  TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1454  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1455  << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1456  TFile* rfile = TFile::Open( rfname, "READ" );
1457  ReadStateFromStream( *rfile );
1458  rfile->Close();
1459  }
1460 }
1461 ////////////////////////////////////////////////////////////////////////////////
1462 /// for reading from memory
1463 
1464 void TMVA::MethodBase::ReadStateFromXMLString( const char* xmlstr ) {
1465 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,26,00)
1466  void* doc = gTools().xmlengine().ParseString(xmlstr);
1467  void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1468  ReadStateFromXML(rootnode);
1469  gTools().xmlengine().FreeDoc(doc);
1470 #else
1471  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Method MethodBase::ReadStateFromXMLString( const char* xmlstr = "
1472  << xmlstr << " ) is not available for ROOT versions prior to 5.26/00." << Endl;
1473 #endif
1474 
1475  return;
1476 }
1477 
1478 ////////////////////////////////////////////////////////////////////////////////
1479 
1480 void TMVA::MethodBase::ReadStateFromXML( void* methodNode )
1481 {
1482 
1483  TString fullMethodName;
1484  gTools().ReadAttr( methodNode, "Method", fullMethodName );
1485 
1486  fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1487 
1488  // update logger
1489  Log().SetSource( GetName() );
1490  Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1491  << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1492 
1493  // after the method name is read, the testvar can be set
1494  SetTestvarName();
1495 
1496  TString nodeName("");
1497  void* ch = gTools().GetChild(methodNode);
1498  while (ch!=0) {
1499  nodeName = TString( gTools().GetName(ch) );
1500 
1501  if (nodeName=="GeneralInfo") {
1502  // read analysis type
1503 
1504  TString name(""),val("");
1505  void* antypeNode = gTools().GetChild(ch);
1506  while (antypeNode) {
1507  gTools().ReadAttr( antypeNode, "name", name );
1508 
1509  if (name == "TrainingTime")
1510  gTools().ReadAttr( antypeNode, "value", fTrainTime );
1511 
1512  if (name == "AnalysisType") {
1513  gTools().ReadAttr( antypeNode, "value", val );
1514  val.ToLower();
1515  if (val == "regression" ) SetAnalysisType( Types::kRegression );
1516  else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1517  else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1518  else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1519  }
1520 
1521  if (name == "TMVA Release" || name == "TMVA") {
1522  TString s;
1523  gTools().ReadAttr( antypeNode, "value", s);
1524  fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1525  Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1526  }
1527 
1528  if (name == "ROOT Release" || name == "ROOT") {
1529  TString s;
1530  gTools().ReadAttr( antypeNode, "value", s);
1531  fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1532  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1533  << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1534  }
1535  antypeNode = gTools().GetNextChild(antypeNode);
1536  }
1537  }
1538  else if (nodeName=="Options") {
1539  ReadOptionsFromXML(ch);
1540  ParseOptions();
1541 
1542  }
1543  else if (nodeName=="Variables") {
1545  }
1546  else if (nodeName=="Spectators") {
1548  }
1549  else if (nodeName=="Classes") {
1550  if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1551  }
1552  else if (nodeName=="Targets") {
1553  if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1554  }
1555  else if (nodeName=="Transformations") {
1557  }
1558  else if (nodeName=="MVAPdfs") {
1559  TString pdfname;
1560  if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1561  if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1562  void* pdfnode = gTools().GetChild(ch);
1563  if (pdfnode) {
1564  gTools().ReadAttr(pdfnode, "Name", pdfname);
1565  fMVAPdfS = new PDF(pdfname);
1566  fMVAPdfS->ReadXML(pdfnode);
1567  pdfnode = gTools().GetNextChild(pdfnode);
1568  gTools().ReadAttr(pdfnode, "Name", pdfname);
1569  fMVAPdfB = new PDF(pdfname);
1570  fMVAPdfB->ReadXML(pdfnode);
1571  }
1572  }
1573  else if (nodeName=="Weights") {
1574  ReadWeightsFromXML(ch);
1575  }
1576  else {
1577  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1578  }
1579  ch = gTools().GetNextChild(ch);
1580 
1581  }
1582 
1583  // update transformation handler
1584  if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1585 }
1586 
1587 ////////////////////////////////////////////////////////////////////////////////
1588 /// read the header from the weight files of the different MVA methods
1589 
1590 void TMVA::MethodBase::ReadStateFromStream( std::istream& fin )
1591 {
1592  char buf[512];
1593 
1594  // when reading from stream, we assume the files are produced with TMVA<=397
1596 
1597 
1598  // first read the method name
1599  GetLine(fin,buf);
1600  while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1601  TString namestr(buf);
1602 
1603  TString methodType = namestr(0,namestr.Index("::"));
1604  methodType = methodType(methodType.Last(' '),methodType.Length());
1605  methodType = methodType.Strip(TString::kLeading);
1606 
1607  TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1608  methodName = methodName.Strip(TString::kLeading);
1609  if (methodName == "") methodName = methodType;
1610  fMethodName = methodName;
1611 
1612  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1613 
1614  // update logger
1615  Log().SetSource( GetName() );
1616 
1617  // now the question is whether to read the variables first or the options (well, of course the order
1618  // of writing them needs to agree)
1619  //
1620  // the option "Decorrelation" is needed to decide if the variables we
1621  // read are decorrelated or not
1622  //
1623  // the variables are needed by some methods (TMLP) to build the NN
1624  // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1625  // we read the variables, and then we process the options
1626 
1627  // now read all options
1628  GetLine(fin,buf);
1629  while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1630  ReadOptionsFromStream(fin);
1631  ParseOptions();
1632 
1633  // Now read variable info
1634  fin.getline(buf,512);
1635  while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1636  ReadVarsFromStream(fin);
1637 
1638  // now we process the options (of the derived class)
1639  ProcessOptions();
1640 
1641  if (IsNormalised()) {
1644  norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1645  }
1646  VariableTransformBase *varTrafo(0), *varTrafo2(0);
1647  if ( fVarTransformString == "None") {
1648  if (fUseDecorr)
1650  } else if ( fVarTransformString == "Decorrelate" ) {
1652  } else if ( fVarTransformString == "PCA" ) {
1654  } else if ( fVarTransformString == "Uniform" ) {
1655  varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1656  } else if ( fVarTransformString == "Gauss" ) {
1658  } else if ( fVarTransformString == "GaussDecorr" ) {
1661  } else {
1662  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1663  << fVarTransformString << "' unknown." << Endl;
1664  }
1665  // Now read decorrelation matrix if available
1666  if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1667  fin.getline(buf,512);
1668  while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1669  if (varTrafo) {
1671  varTrafo->ReadTransformationFromStream(fin, trafo );
1672  }
1673  if (varTrafo2) {
1675  varTrafo2->ReadTransformationFromStream(fin, trafo );
1676  }
1677  }
1678 
1679 
1680  if (HasMVAPdfs()) {
1681  // Now read the MVA PDFs
1682  fin.getline(buf,512);
1683  while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1684  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1685  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1686  fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1687  fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1690 
1691  fin >> *fMVAPdfS;
1692  fin >> *fMVAPdfB;
1693  }
1694 
1695  // Now read weights
1696  fin.getline(buf,512);
1697  while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1698  fin.getline(buf,512);
1699  ReadWeightsFromStream( fin );;
1700 
1701  // update transformation handler
1702  if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1703 
1704 }
1705 
1706 ////////////////////////////////////////////////////////////////////////////////
1707 /// write the list of variables (name, min, max) for a given data
1708 /// transformation method to the stream
1709 
1710 void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1711 {
1712  o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1713  std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1714  for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1715  o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1716  varIt = DataInfo().GetSpectatorInfos().begin();
1717  for (; varIt!=DataInfo().GetSpectatorInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1718 }
1719 
1720 ////////////////////////////////////////////////////////////////////////////////
1721 /// Read the variables (name, min, max) for a given data
1722 /// transformation method from the stream. In the stream we only
1723 /// expect the limits which will be set
1724 
1725 void TMVA::MethodBase::ReadVarsFromStream( std::istream& istr )
1726 {
1727  TString dummy;
1728  UInt_t readNVar;
1729  istr >> dummy >> readNVar;
1730 
1731  if (readNVar!=DataInfo().GetNVariables()) {
1732  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1733  << " while there are " << readNVar << " variables declared in the file"
1734  << Endl;
1735  }
1736 
1737  // we want to make sure all variables are read in the order they are defined
1738  VariableInfo varInfo;
1739  std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1740  int varIdx = 0;
1741  for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt, ++varIdx) {
1742  varInfo.ReadFromStream(istr);
1743  if (varIt->GetExpression() == varInfo.GetExpression()) {
1744  varInfo.SetExternalLink((*varIt).GetExternalLink());
1745  (*varIt) = varInfo;
1746  }
1747  else {
1748  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1749  Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1750  Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1751  Log() << kINFO << "the correct working of the method):" << Endl;
1752  Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1753  Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1754  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1755  }
1756  }
1757 }
1758 
1759 ////////////////////////////////////////////////////////////////////////////////
1760 /// write variable info to XML
1761 
1762 void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1763 {
1764  void* vars = gTools().AddChild(parent, "Variables");
1765  gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1766 
1767  for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1768  VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1769  void* var = gTools().AddChild( vars, "Variable" );
1770  gTools().AddAttr( var, "VarIndex", idx );
1771  vi.AddToXML( var );
1772  }
1773 }
1774 
1775 ////////////////////////////////////////////////////////////////////////////////
1776 /// write spectator info to XML
1777 
1778 void TMVA::MethodBase::AddSpectatorsXMLTo( void* parent ) const
1779 {
1780  void* specs = gTools().AddChild(parent, "Spectators");
1781 
1782  UInt_t writeIdx=0;
1783  for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1784 
1785  VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1786 
1787  // we do not want to write spectators that are category-cuts,
1788  // except if the method is the category method and the spectators belong to it
1789  if (vi.GetVarType()=='C') continue;
1790 
1791  void* spec = gTools().AddChild( specs, "Spectator" );
1792  gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1793  vi.AddToXML( spec );
1794  }
1795  gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1796 }
1797 
1798 ////////////////////////////////////////////////////////////////////////////////
1799 /// write class info to XML
1800 
1801 void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1802 {
1803  UInt_t nClasses=DataInfo().GetNClasses();
1804 
1805  void* classes = gTools().AddChild(parent, "Classes");
1806  gTools().AddAttr( classes, "NClass", nClasses );
1807 
1808  for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1809  ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1810  TString className =classInfo->GetName();
1811  UInt_t classNumber=classInfo->GetNumber();
1812 
1813  void* classNode=gTools().AddChild(classes, "Class");
1814  gTools().AddAttr( classNode, "Name", className );
1815  gTools().AddAttr( classNode, "Index", classNumber );
1816  }
1817 }
1818 ////////////////////////////////////////////////////////////////////////////////
1819 /// write target info to XML
1820 
1821 void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1822 {
1823  void* targets = gTools().AddChild(parent, "Targets");
1824  gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1825 
1826  for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1827  VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1828  void* tar = gTools().AddChild( targets, "Target" );
1829  gTools().AddAttr( tar, "TargetIndex", idx );
1830  vi.AddToXML( tar );
1831  }
1832 }
1833 
1834 ////////////////////////////////////////////////////////////////////////////////
1835 /// read variable info from XML
1836 
1838 {
1839  UInt_t readNVar;
1840  gTools().ReadAttr( varnode, "NVar", readNVar);
1841 
1842  if (readNVar!=DataInfo().GetNVariables()) {
1843  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1844  << " while there are " << readNVar << " variables declared in the file"
1845  << Endl;
1846  }
1847 
1848  // we want to make sure all variables are read in the order they are defined
1849  VariableInfo readVarInfo, existingVarInfo;
1850  int varIdx = 0;
1851  void* ch = gTools().GetChild(varnode);
1852  while (ch) {
1853  gTools().ReadAttr( ch, "VarIndex", varIdx);
1854  existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1855  readVarInfo.ReadFromXML(ch);
1856 
1857  if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1858  readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1859  existingVarInfo = readVarInfo;
1860  }
1861  else {
1862  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1863  Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1864  Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1865  Log() << kINFO << "correct working of the method):" << Endl;
1866  Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1867  Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1868  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1869  }
1870  ch = gTools().GetNextChild(ch);
1871  }
1872 }
1873 
1874 ////////////////////////////////////////////////////////////////////////////////
1875 /// read spectator info from XML
1876 
1878 {
1879  UInt_t readNSpec;
1880  gTools().ReadAttr( specnode, "NSpec", readNSpec);
1881 
1882  if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1883  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1884  << " while there are " << readNSpec << " spectators declared in the file"
1885  << Endl;
1886  }
1887 
1888  // we want to make sure all variables are read in the order they are defined
1889  VariableInfo readSpecInfo, existingSpecInfo;
1890  int specIdx = 0;
1891  void* ch = gTools().GetChild(specnode);
1892  while (ch) {
1893  gTools().ReadAttr( ch, "SpecIndex", specIdx);
1894  existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1895  readSpecInfo.ReadFromXML(ch);
1896 
1897  if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1898  readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1899  existingSpecInfo = readSpecInfo;
1900  }
1901  else {
1902  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1903  Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1904  Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1905  Log() << kINFO << "correct working of the method):" << Endl;
1906  Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1907  Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1908  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1909  }
1910  ch = gTools().GetNextChild(ch);
1911  }
1912 }
1913 
1914 ////////////////////////////////////////////////////////////////////////////////
1915 /// read number of classes from XML
1916 
1918 {
1919  UInt_t readNCls;
1920  // coverity[tainted_data_argument]
1921  gTools().ReadAttr( clsnode, "NClass", readNCls);
1922 
1923  TString className="";
1924  UInt_t classIndex=0;
1925  void* ch = gTools().GetChild(clsnode);
1926  if (!ch) {
1927  for (UInt_t icls = 0; icls<readNCls;++icls) {
1928  TString classname = Form("class%i",icls);
1929  DataInfo().AddClass(classname);
1930 
1931  }
1932  }
1933  else{
1934  while (ch) {
1935  gTools().ReadAttr( ch, "Index", classIndex);
1936  gTools().ReadAttr( ch, "Name", className );
1937  DataInfo().AddClass(className);
1938 
1939  ch = gTools().GetNextChild(ch);
1940  }
1941  }
1942 
1943  // retrieve signal and background class index
1944  if (DataInfo().GetClassInfo("Signal") != 0) {
1945  fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1946  }
1947  else
1948  fSignalClass=0;
1949  if (DataInfo().GetClassInfo("Background") != 0) {
1950  fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
1951  }
1952  else
1953  fBackgroundClass=1;
1954 }
1955 
1956 ////////////////////////////////////////////////////////////////////////////////
1957 /// read target info from XML
1958 
1960 {
1961  UInt_t readNTar;
1962  gTools().ReadAttr( tarnode, "NTrgt", readNTar);
1963 
1964  int tarIdx = 0;
1965  TString expression;
1966  void* ch = gTools().GetChild(tarnode);
1967  while (ch) {
1968  gTools().ReadAttr( ch, "TargetIndex", tarIdx);
1969  gTools().ReadAttr( ch, "Expression", expression);
1970  DataInfo().AddTarget(expression,"","",0,0);
1971 
1972  ch = gTools().GetNextChild(ch);
1973  }
1974 }
1975 
1976 ////////////////////////////////////////////////////////////////////////////////
1977 /// returns the ROOT directory where info/histograms etc of the
1978 /// corresponding MVA method instance are stored
1979 
1981 {
1982  if (fBaseDir != 0) return fBaseDir;
1983  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
1984 
1985  TDirectory* methodDir = MethodBaseDir();
1986  if (methodDir==0)
1987  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
1988 
1989  TString defaultDir = GetMethodName();
1990  TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
1991  if(!sdir)
1992  {
1993  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
1994  sdir = methodDir->mkdir(defaultDir);
1995  sdir->cd();
1996  // write weight file name into target file
1997  if (fModelPersistence) {
1998  TObjString wfilePath( gSystem->WorkingDirectory() );
1999  TObjString wfileName( GetWeightFileName() );
2000  wfilePath.Write( "TrainingPath" );
2001  wfileName.Write( "WeightFileName" );
2002  }
2003  }
2004 
2005  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
2006  return sdir;
2007 }
2008 
2009 ////////////////////////////////////////////////////////////////////////////////
2010 /// returns the ROOT directory where all instances of the
2011 /// corresponding MVA method are stored
2012 
2014  {
2015  if (fMethodBaseDir != 0) return fMethodBaseDir;
2016 
2017  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " not set yet --> check if already there.." <<Endl;
2018 
2019 
2020  TDirectory *fFactoryBaseDir=GetFile();
2021 
2022  fMethodBaseDir = fFactoryBaseDir->GetDirectory(DataInfo().GetName());
2023  if(!fMethodBaseDir) //creating dataset directory
2024  {
2025  fMethodBaseDir = fFactoryBaseDir->mkdir(DataInfo().GetName(),Form("Base directory for dataset %s",DataInfo().GetName()));
2026  if(!fMethodBaseDir)Log()<<kFATAL<<"Can not create dir "<<DataInfo().GetName();
2027  }
2028  TString _methodDir = Form("Method_%s",GetMethodName().Data());
2030 
2031  if(!fMethodBaseDir){
2032  fMethodBaseDir = fFactoryBaseDir->GetDirectory(DataInfo().GetName())->mkdir(_methodDir.Data(),Form("Directory for all %s methods", GetMethodTypeName().Data()));
2033  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " does not exist yet--> created it" <<Endl;
2034  }
2035 
2036  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<"Return from MethodBaseDir() after creating base directory "<<Endl;
2037  return fMethodBaseDir;
2038 }
2039 
2040 ////////////////////////////////////////////////////////////////////////////////
2041 /// set directory of weight file
2042 
2044 {
2045  fFileDir = fileDir;
2047 }
2048 
2049 ////////////////////////////////////////////////////////////////////////////////
2050 /// set the weight file name (depreciated)
2051 
2053 {
2054  fWeightFile = theWeightFile;
2055 }
2056 
2057 ////////////////////////////////////////////////////////////////////////////////
2058 /// retrieve weight file name
2059 
2061 {
2062  if (fWeightFile!="") return fWeightFile;
2063 
2064  // the default consists of
2065  // directory/jobname_methodname_suffix.extension.{root/txt}
2066  TString suffix = "";
2067  TString wFileDir(GetWeightFileDir());
2068  TString wFileName = GetJobName() + "_" + GetMethodName() +
2069  suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml";
2070  if (wFileDir.IsNull() ) return wFileName;
2071  // add weight file directory of it is not null
2072  return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2073  + wFileName );
2074 }
2075 ////////////////////////////////////////////////////////////////////////////////
2076 /// writes all MVA evaluation histograms to file
2077 
2079 {
2080  BaseDir()->cd();
2081 
2082 
2083  // write MVA PDFs to file - if exist
2084  if (0 != fMVAPdfS) {
2087  fMVAPdfS->GetPDFHist()->Write();
2088  }
2089  if (0 != fMVAPdfB) {
2092  fMVAPdfB->GetPDFHist()->Write();
2093  }
2094 
2095  // write result-histograms
2096  Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2097  if (!results)
2098  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2099  << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2100  << "/kMaxAnalysisType" << Endl;
2101  results->GetStorage()->Write();
2102  if (treetype==Types::kTesting) {
2104  }
2105 }
2106 
2107 ////////////////////////////////////////////////////////////////////////////////
2108 /// write special monitoring histograms to file
2109 /// dummy implementation here -----------------
2110 
2112 {
2113 }
2114 
2115 ////////////////////////////////////////////////////////////////////////////////
2116 /// reads one line from the input stream
2117 /// checks for certain keywords and interprets
2118 /// the line if keywords are found
2119 
2120 Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2121 {
2122  fin.getline(buf,512);
2123  TString line(buf);
2124  if (line.BeginsWith("TMVA Release")) {
2125  Ssiz_t start = line.First('[')+1;
2126  Ssiz_t length = line.Index("]",start)-start;
2127  TString code = line(start,length);
2128  std::stringstream s(code.Data());
2130  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2131  }
2132  if (line.BeginsWith("ROOT Release")) {
2133  Ssiz_t start = line.First('[')+1;
2134  Ssiz_t length = line.Index("]",start)-start;
2135  TString code = line(start,length);
2136  std::stringstream s(code.Data());
2138  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2139  }
2140  if (line.BeginsWith("Analysis type")) {
2141  Ssiz_t start = line.First('[')+1;
2142  Ssiz_t length = line.Index("]",start)-start;
2143  TString code = line(start,length);
2144  std::stringstream s(code.Data());
2145  std::string analysisType;
2146  s >> analysisType;
2147  if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2148  else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2149  else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2150  else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2151 
2152  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2153  << (GetAnalysisType() == Types::kRegression ? "Regression" :
2154  (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2155  }
2156 
2157  return true;
2158 }
2159 
2160 ////////////////////////////////////////////////////////////////////////////////
2161 /// Create PDFs of the MVA output variables
2162 
2164 {
2166 
2167  // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2168  // otherwise they will be only used 'online'
2169  ResultsClassification * mvaRes = dynamic_cast<ResultsClassification*>
2171 
2172  if (mvaRes==0 || mvaRes->GetSize()==0) {
2173  Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2174  }
2175 
2176  Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2177  Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2178 
2179  // create histograms that serve as basis to create the MVA Pdfs
2180  TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2181  fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2182  TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2183  fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2184 
2185 
2186  // compute sum of weights properly
2187  histMVAPdfS->Sumw2();
2188  histMVAPdfB->Sumw2();
2189 
2190  // fill histograms
2191  for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2192  Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2193  Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2194 
2195  if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2196  else histMVAPdfB->Fill( theVal, theWeight );
2197  }
2198 
2199  gTools().NormHist( histMVAPdfS );
2200  gTools().NormHist( histMVAPdfB );
2201 
2202  // momentary hack for ROOT problem
2203  if(!IsSilentFile())
2204  {
2205  histMVAPdfS->Write();
2206  histMVAPdfB->Write();
2207  }
2208  // create PDFs
2209  fMVAPdfS->BuildPDF ( histMVAPdfS );
2210  fMVAPdfB->BuildPDF ( histMVAPdfB );
2211  fMVAPdfS->ValidatePDF( histMVAPdfS );
2212  fMVAPdfB->ValidatePDF( histMVAPdfB );
2213 
2214  if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2215  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2216  << Form( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2217  GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2218  << Endl;
2219  }
2220 
2221  delete histMVAPdfS;
2222  delete histMVAPdfB;
2223 }
2224 
2226  // the simple one, automatically calculates the mvaVal and uses the
2227  // SAME sig/bkg ratio as given in the training sample (typically 50/50
2228  // .. (NormMode=EqualNumEvents) but can be different)
2229  if (!fMVAPdfS || !fMVAPdfB) {
2230  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2231  CreateMVAPdfs();
2232  }
2234  Double_t mvaVal = GetMvaValue(ev);
2235 
2236  return GetProba(mvaVal,sigFraction);
2237 
2238 }
2239 ////////////////////////////////////////////////////////////////////////////////
2240 /// compute likelihood ratio
2241 
2243 {
2244  if (!fMVAPdfS || !fMVAPdfB) {
2245  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2246  return -1.0;
2247  }
2248  Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2249  Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2250 
2251  Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2252 
2253  return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2254 }
2255 
2256 ////////////////////////////////////////////////////////////////////////////////
2257 /// compute rarity:
2258 /// \f[
2259 /// R(x) = \int_{[-\infty..x]} { PDF(x') dx' }
2260 /// \f]
2261 /// where PDF(x) is the PDF of the classifier's signal or background distribution
2262 
2264 {
2265  if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2266  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2267  << "select option \"CreateMVAPdfs\"" << Endl;
2268  return 0.0;
2269  }
2270 
2271  PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2272 
2273  return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2274 }
2275 
2276 ////////////////////////////////////////////////////////////////////////////////
2277 /// fill background efficiency (resp. rejection) versus signal efficiency plots
2278 /// returns signal efficiency at background efficiency indicated in theString
2279 
2281 {
2282  Data()->SetCurrentType(type);
2283  Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2284  std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2285 
2286  // parse input string for required background efficiency
2287  TList* list = gTools().ParseFormatLine( theString );
2288 
2289  // sanity check
2290  Bool_t computeArea = kFALSE;
2291  if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2292  else if (list->GetSize() > 2) {
2293  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2294  << " in string: " << theString
2295  << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2296  delete list;
2297  return -1;
2298  }
2299 
2300  // sanity check
2301  if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2302  results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2303  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2304  delete list;
2305  return -1.0;
2306  }
2307 
2308  // create histograms
2309 
2310  // first, get efficiency histograms for signal and background
2311  TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2312  Double_t xmin = effhist->GetXaxis()->GetXmin();
2313  Double_t xmax = effhist->GetXaxis()->GetXmax();
2314 
2315  TTHREAD_TLS(Double_t) nevtS;
2316 
2317  // first round ? --> create histograms
2318  if (results->DoesExist("MVA_EFF_S")==0) {
2319 
2320  // for efficiency plot
2321  TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2322  TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2323  results->Store(eff_s, "MVA_EFF_S");
2324  results->Store(eff_b, "MVA_EFF_B");
2325 
2326  // sign if cut
2327  Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2328 
2329  // this method is unbinned
2330  nevtS = 0;
2331  for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2332 
2333  // read the tree
2334  Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2335  Float_t theWeight = GetEvent(ievt)->GetWeight();
2336  Float_t theVal = (*mvaRes)[ievt];
2337 
2338  // select histogram depending on if sig or bgd
2339  TH1* theHist = isSignal ? eff_s : eff_b;
2340 
2341  // count signal and background events in tree
2342  if (isSignal) nevtS+=theWeight;
2343 
2344  TAxis* axis = theHist->GetXaxis();
2345  Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2346  if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2347  if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2348  if (sign > 0 && maxbin < 1 ) maxbin = 1;
2349  if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2350 
2351  if (sign > 0)
2352  for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2353  else if (sign < 0)
2354  for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2355  else
2356  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2357  }
2358 
2359  // renormalise maximum to <=1
2360  // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2361  // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2362 
2365 
2366  // background efficiency versus signal efficiency
2367  TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2368  results->Store(eff_BvsS, "MVA_EFF_BvsS");
2369  eff_BvsS->SetXTitle( "Signal eff" );
2370  eff_BvsS->SetYTitle( "Backgr eff" );
2371 
2372  // background rejection (=1-eff.) versus signal efficiency
2373  TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2374  results->Store(rej_BvsS);
2375  rej_BvsS->SetXTitle( "Signal eff" );
2376  rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2377 
2378  // inverse background eff (1/eff.) versus signal efficiency
2379  TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2380  GetTestvarName(), fNbins, 0, 1 );
2381  results->Store(inveff_BvsS);
2382  inveff_BvsS->SetXTitle( "Signal eff" );
2383  inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2384 
2385  // use root finder
2386  // spline background efficiency plot
2387  // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2388  if (Use_Splines_for_Eff_) {
2389  fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2390  fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2391 
2392  // verify spline sanity
2393  gTools().CheckSplines( eff_s, fSplRefS );
2394  gTools().CheckSplines( eff_b, fSplRefB );
2395  }
2396 
2397  // make the background-vs-signal efficiency plot
2398 
2399  // create root finder
2400  RootFinder rootFinder( this, fXmin, fXmax );
2401 
2402  Double_t effB = 0;
2403  fEffS = eff_s; // to be set for the root finder
2404  for (Int_t bini=1; bini<=fNbins; bini++) {
2405 
2406  // find cut value corresponding to a given signal efficiency
2407  Double_t effS = eff_BvsS->GetBinCenter( bini );
2408  Double_t cut = rootFinder.Root( effS );
2409 
2410  // retrieve background efficiency for given cut
2411  if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2412  else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2413 
2414  // and fill histograms
2415  eff_BvsS->SetBinContent( bini, effB );
2416  rej_BvsS->SetBinContent( bini, 1.0-effB );
2418  inveff_BvsS->SetBinContent( bini, 1.0/effB );
2419  }
2420 
2421  // create splines for histogram
2422  fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2423 
2424  // search for overlap point where, when cutting on it,
2425  // one would obtain: eff_S = rej_B = 1 - eff_B
2426  Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2427  Int_t nbins_ = 5000;
2428  for (Int_t bini=1; bini<=nbins_; bini++) {
2429 
2430  // get corresponding signal and background efficiencies
2431  effS = (bini - 0.5)/Float_t(nbins_);
2432  rejB = 1.0 - fSpleffBvsS->Eval( effS );
2433 
2434  // find signal efficiency that corresponds to required background efficiency
2435  if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2436  effS_ = effS;
2437  rejB_ = rejB;
2438  }
2439 
2440  // find cut that corresponds to signal efficiency and update signal-like criterion
2441  Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2442  SetSignalReferenceCut( cut );
2443  fEffS = 0;
2444  }
2445 
2446  // must exist...
2447  if (0 == fSpleffBvsS) {
2448  delete list;
2449  return 0.0;
2450  }
2451 
2452  // now find signal efficiency that corresponds to required background efficiency
2453  Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2454  Int_t nbins_ = 1000;
2455 
2456  if (computeArea) {
2457 
2458  // compute area of rej-vs-eff plot
2459  Double_t integral = 0;
2460  for (Int_t bini=1; bini<=nbins_; bini++) {
2461 
2462  // get corresponding signal and background efficiencies
2463  effS = (bini - 0.5)/Float_t(nbins_);
2464  effB = fSpleffBvsS->Eval( effS );
2465  integral += (1.0 - effB);
2466  }
2467  integral /= nbins_;
2468 
2469  delete list;
2470  return integral;
2471  }
2472  else {
2473 
2474  // that will be the value of the efficiency retured (does not affect
2475  // the efficiency-vs-bkg plot which is done anyway.
2476  Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2477 
2478  // find precise efficiency value
2479  for (Int_t bini=1; bini<=nbins_; bini++) {
2480 
2481  // get corresponding signal and background efficiencies
2482  effS = (bini - 0.5)/Float_t(nbins_);
2483  effB = fSpleffBvsS->Eval( effS );
2484 
2485  // find signal efficiency that corresponds to required background efficiency
2486  if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2487  effS_ = effS;
2488  effB_ = effB;
2489  }
2490 
2491  // take mean between bin above and bin below
2492  effS = 0.5*(effS + effS_);
2493 
2494  effSerr = 0;
2495  if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2496 
2497  delete list;
2498  return effS;
2499  }
2500 
2501  return -1;
2502 }
2503 
2504 ////////////////////////////////////////////////////////////////////////////////
2505 
2507 {
2509 
2511 
2512  // fill background efficiency (resp. rejection) versus signal efficiency plots
2513  // returns signal efficiency at background efficiency indicated in theString
2514 
2515  // parse input string for required background efficiency
2516  TList* list = gTools().ParseFormatLine( theString );
2517  // sanity check
2518 
2519  if (list->GetSize() != 2) {
2520  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2521  << " in string: " << theString
2522  << " | required format, e.g., Efficiency:0.05" << Endl;
2523  delete list;
2524  return -1;
2525  }
2526  // that will be the value of the efficiency retured (does not affect
2527  // the efficiency-vs-bkg plot which is done anyway.
2528  Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2529 
2530  delete list;
2531 
2532  // sanity check
2533  if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2534  results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2535  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2536  << Endl;
2537  return -1.0;
2538  }
2539 
2540  // create histogram
2541 
2542  // first, get efficiency histograms for signal and background
2543  TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2544  Double_t xmin = effhist->GetXaxis()->GetXmin();
2545  Double_t xmax = effhist->GetXaxis()->GetXmax();
2546 
2547  // first round ? --> create and fill histograms
2548  if (results->DoesExist("MVA_TRAIN_S")==0) {
2549 
2550  // classifier response distributions for test sample
2551  Double_t sxmax = fXmax+0.00001;
2552 
2553  // MVA plots on the training sample (check for overtraining)
2554  TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2555  TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2556  results->Store(mva_s_tr, "MVA_TRAIN_S");
2557  results->Store(mva_b_tr, "MVA_TRAIN_B");
2558  mva_s_tr->Sumw2();
2559  mva_b_tr->Sumw2();
2560 
2561  // Training efficiency plots
2562  TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2563  fNbinsH, xmin, xmax );
2564  TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2565  fNbinsH, xmin, xmax );
2566  results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2567  results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2568 
2569  // sign if cut
2570  Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2571 
2572  std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2573  assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2574 
2575  // this method is unbinned
2576  for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2577 
2578  Data()->SetCurrentEvent(ievt);
2579  const Event* ev = GetEvent();
2580 
2581  Double_t theVal = mvaValues[ievt];
2582  Double_t theWeight = ev->GetWeight();
2583 
2584  TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2585  TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2586 
2587  theClsHist->Fill( theVal, theWeight );
2588 
2589  TAxis* axis = theEffHist->GetXaxis();
2590  Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2591  if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2592  if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2593  if (sign > 0 && maxbin < 1 ) maxbin = 1;
2594  if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2595 
2596  if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2597  else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2598  }
2599 
2600  // normalise output distributions
2601  // uncomment those (and several others if you want unnormalized output
2602  gTools().NormHist( mva_s_tr );
2603  gTools().NormHist( mva_b_tr );
2604 
2605  // renormalise to maximum
2606  mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2607  mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2608 
2609  // Training background efficiency versus signal efficiency
2610  TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2611  // Training background rejection (=1-eff.) versus signal efficiency
2612  TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2613  results->Store(eff_bvss, "EFF_BVSS_TR");
2614  results->Store(rej_bvss, "REJ_BVSS_TR");
2615 
2616  // use root finder
2617  // spline background efficiency plot
2618  // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2619  if (Use_Splines_for_Eff_) {
2620  if (fSplTrainRefS) delete fSplTrainRefS;
2621  if (fSplTrainRefB) delete fSplTrainRefB;
2622  fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2623  fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2624 
2625  // verify spline sanity
2626  gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2627  gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2628  }
2629 
2630  // make the background-vs-signal efficiency plot
2631 
2632  // create root finder
2633  RootFinder rootFinder(this, fXmin, fXmax );
2634 
2635  Double_t effB = 0;
2636  fEffS = results->GetHist("MVA_TRAINEFF_S");
2637  for (Int_t bini=1; bini<=fNbins; bini++) {
2638 
2639  // find cut value corresponding to a given signal efficiency
2640  Double_t effS = eff_bvss->GetBinCenter( bini );
2641 
2642  Double_t cut = rootFinder.Root( effS );
2643 
2644  // retrieve background efficiency for given cut
2645  if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2646  else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2647 
2648  // and fill histograms
2649  eff_bvss->SetBinContent( bini, effB );
2650  rej_bvss->SetBinContent( bini, 1.0-effB );
2651  }
2652  fEffS = 0;
2653 
2654  // create splines for histogram
2655  fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2656  }
2657 
2658  // must exist...
2659  if (0 == fSplTrainEffBvsS) return 0.0;
2660 
2661  // now find signal efficiency that corresponds to required background efficiency
2662  Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2663  Int_t nbins_ = 1000;
2664  for (Int_t bini=1; bini<=nbins_; bini++) {
2665 
2666  // get corresponding signal and background efficiencies
2667  effS = (bini - 0.5)/Float_t(nbins_);
2668  effB = fSplTrainEffBvsS->Eval( effS );
2669 
2670  // find signal efficiency that corresponds to required background efficiency
2671  if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2672  effS_ = effS;
2673  effB_ = effB;
2674  }
2675 
2676  return 0.5*(effS + effS_); // the mean between bin above and bin below
2677 }
2678 
2679 ////////////////////////////////////////////////////////////////////////////////
2680 
2681 std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2682 {
2685  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2686 
2687  purity.push_back(resMulticlass->GetAchievablePur());
2688  return resMulticlass->GetAchievableEff();
2689 }
2690 
2691 ////////////////////////////////////////////////////////////////////////////////
2692 
2693 std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2694 {
2697  if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2698 
2699  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2700  for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2701  resMulticlass->GetBestMultiClassCuts(icls);
2702  }
2703 
2704  purity.push_back(resMulticlass->GetAchievablePur());
2705  return resMulticlass->GetAchievableEff();
2706 }
2707 
2708 ////////////////////////////////////////////////////////////////////////////////
2709 /// Construct a confusion matrix for a multiclass classifier. The confusion
2710 /// matrix compares, in turn, each class agaist all other classes in a pair-wise
2711 /// fashion. In rows with index \f$ k_r = 0 ... K \f$, \f$ k_r \f$ is
2712 /// considered signal for the sake of comparison and for each column
2713 /// \f$ k_c = 0 ... K \f$ the corresponding class is considered background.
2714 ///
2715 /// Note that the diagonal elements will be returned as NaN since this will
2716 /// compare a class against itself.
2717 ///
2718 /// \see TMVA::ResultsMulticlass::GetConfusionMatrix
2719 ///
2720 /// \param[in] effB The background efficiency for which to evaluate.
2721 /// \param[in] type The data set on which to evaluate (training, testing ...).
2722 ///
2723 /// \return A matrix containing signal efficiencies for the given background
2724 /// efficiency. The diagonal elements are NaN since this measure is
2725 /// meaningless (comparing a class against itself).
2726 ///
2727 
2729 {
2731  Log() << kFATAL << "Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2732  return TMatrixD(0, 0);
2733  }
2734 
2735  Data()->SetCurrentType(type);
2736  ResultsMulticlass *resMulticlass =
2737  dynamic_cast<ResultsMulticlass *>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
2738 
2739  if (resMulticlass == nullptr) {
2740  Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2741  << "unable to create pointer in GetMulticlassEfficiency, exiting." << Endl;
2742  return TMatrixD(0, 0);
2743  }
2744 
2745  return resMulticlass->GetConfusionMatrix(effB);
2746 }
2747 
2748 ////////////////////////////////////////////////////////////////////////////////
2749 /// compute significance of mean difference
2750 /// \f[
2751 /// significance = \frac{|<S> - <B>|}{\sqrt{RMS_{S2} + RMS_{B2}}}
2752 /// \f]
2753 
2755 {
2756  Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2757 
2758  return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2759 }
2760 
2761 ////////////////////////////////////////////////////////////////////////////////
2762 /// compute "separation" defined as
2763 /// \f[
2764 /// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2765 /// \f]
2766 
2768 {
2769  return gTools().GetSeparation( histoS, histoB );
2770 }
2771 
2772 ////////////////////////////////////////////////////////////////////////////////
2773 /// compute "separation" defined as
2774 /// \f[
2775 /// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2776 /// \f]
2777 
2779 {
2780  // note, if zero pointers given, use internal pdf
2781  // sanity check first
2782  if ((!pdfS && pdfB) || (pdfS && !pdfB))
2783  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2784  if (!pdfS) pdfS = fSplS;
2785  if (!pdfB) pdfB = fSplB;
2786 
2787  if (!fSplS || !fSplB) {
2788  Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2789  << " fSplS or fSplB are not yet filled" << Endl;
2790  return 0;
2791  }else{
2792  return gTools().GetSeparation( *pdfS, *pdfB );
2793  }
2794 }
2795 
2796 ////////////////////////////////////////////////////////////////////////////////
2797 /// calculate the area (integral) under the ROC curve as a
2798 /// overall quality measure of the classification
2799 
2801 {
2802  // note, if zero pointers given, use internal pdf
2803  // sanity check first
2804  if ((!histS && histB) || (histS && !histB))
2805  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2806 
2807  if (histS==0 || histB==0) return 0.;
2808 
2809  TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2810  TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2811 
2812 
2813  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2814  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2815 
2816  Double_t integral = 0;
2817  UInt_t nsteps = 1000;
2818  Double_t step = (xmax-xmin)/Double_t(nsteps);
2819  Double_t cut = xmin;
2820  for (UInt_t i=0; i<nsteps; i++) {
2821  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2822  cut+=step;
2823  }
2824  delete pdfS;
2825  delete pdfB;
2826  return integral*step;
2827 }
2828 
2829 
2830 ////////////////////////////////////////////////////////////////////////////////
2831 /// calculate the area (integral) under the ROC curve as a
2832 /// overall quality measure of the classification
2833 
2835 {
2836  // note, if zero pointers given, use internal pdf
2837  // sanity check first
2838  if ((!pdfS && pdfB) || (pdfS && !pdfB))
2839  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2840  if (!pdfS) pdfS = fSplS;
2841  if (!pdfB) pdfB = fSplB;
2842 
2843  if (pdfS==0 || pdfB==0) return 0.;
2844 
2845  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2846  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2847 
2848  Double_t integral = 0;
2849  UInt_t nsteps = 1000;
2850  Double_t step = (xmax-xmin)/Double_t(nsteps);
2851  Double_t cut = xmin;
2852  for (UInt_t i=0; i<nsteps; i++) {
2853  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2854  cut+=step;
2855  }
2856  return integral*step;
2857 }
2858 
2859 ////////////////////////////////////////////////////////////////////////////////
2860 /// plot significance, \f$ \frac{S}{\sqrt{S^2 + B^2}} \f$, curve for given number
2861 /// of signal and background events; returns cut for maximum significance
2862 /// also returned via reference is the maximum significance
2863 
2865  Double_t BackgroundEvents,
2866  Double_t& max_significance_value ) const
2867 {
2869 
2870  Double_t max_significance(0);
2871  Double_t effS(0),effB(0),significance(0);
2872  TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2873 
2874  if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2875  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2876  << "Number of signal or background events is <= 0 ==> abort"
2877  << Endl;
2878  }
2879 
2880  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2881  << SignalEvents/BackgroundEvents << Endl;
2882 
2883  TH1* eff_s = results->GetHist("MVA_EFF_S");
2884  TH1* eff_b = results->GetHist("MVA_EFF_B");
2885 
2886  if ( (eff_s==0) || (eff_b==0) ) {
2887  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2888  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2889  return 0;
2890  }
2891 
2892  for (Int_t bin=1; bin<=fNbinsH; bin++) {
2893  effS = eff_s->GetBinContent( bin );
2894  effB = eff_b->GetBinContent( bin );
2895 
2896  // put significance into a histogram
2897  significance = sqrt(SignalEvents)*( effS )/sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
2898 
2899  temp_histogram->SetBinContent(bin,significance);
2900  }
2901 
2902  // find maximum in histogram
2903  max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
2904  max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
2905 
2906  // delete
2907  delete temp_histogram;
2908 
2909  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
2910  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
2911 
2912  return max_significance;
2913 }
2914 
2915 ////////////////////////////////////////////////////////////////////////////////
2916 /// calculates rms,mean, xmin, xmax of the event variable
2917 /// this can be either done for the variables as they are or for
2918 /// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
2919 
2920 void TMVA::MethodBase::Statistics( Types::ETreeType treeType, const TString& theVarName,
2921  Double_t& meanS, Double_t& meanB,
2922  Double_t& rmsS, Double_t& rmsB,
2923  Double_t& xmin, Double_t& xmax )
2924 {
2925  Types::ETreeType previousTreeType = Data()->GetCurrentType();
2926  Data()->SetCurrentType(treeType);
2927 
2928  Long64_t entries = Data()->GetNEvents();
2929 
2930  // sanity check
2931  if (entries <=0)
2932  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
2933 
2934  // index of the wanted variable
2935  UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
2936 
2937  // first fill signal and background in arrays before analysis
2938  xmin = +DBL_MAX;
2939  xmax = -DBL_MAX;
2940  Long64_t nEventsS = -1;
2941  Long64_t nEventsB = -1;
2942 
2943  // take into account event weights
2944  meanS = 0;
2945  meanB = 0;
2946  rmsS = 0;
2947  rmsB = 0;
2948  Double_t sumwS = 0, sumwB = 0;
2949 
2950  // loop over all training events
2951  for (Int_t ievt = 0; ievt < entries; ievt++) {
2952 
2953  const Event* ev = GetEvent(ievt);
2954 
2955  Double_t theVar = ev->GetValue(varIndex);
2956  Double_t weight = ev->GetWeight();
2957 
2958  if (DataInfo().IsSignal(ev)) {
2959  sumwS += weight;
2960  meanS += weight*theVar;
2961  rmsS += weight*theVar*theVar;
2962  }
2963  else {
2964  sumwB += weight;
2965  meanB += weight*theVar;
2966  rmsB += weight*theVar*theVar;
2967  }
2968  xmin = TMath::Min( xmin, theVar );
2969  xmax = TMath::Max( xmax, theVar );
2970  }
2971  ++nEventsS;
2972  ++nEventsB;
2973 
2974  meanS = meanS/sumwS;
2975  meanB = meanB/sumwB;
2976  rmsS = TMath::Sqrt( rmsS/sumwS - meanS*meanS );
2977  rmsB = TMath::Sqrt( rmsB/sumwB - meanB*meanB );
2978 
2979  Data()->SetCurrentType(previousTreeType);
2980 }
2981 
2982 ////////////////////////////////////////////////////////////////////////////////
2983 /// create reader class for method (classification only at present)
2984 
2985 void TMVA::MethodBase::MakeClass( const TString& theClassFileName ) const
2986 {
2987  // the default consists of
2988  TString classFileName = "";
2989  if (theClassFileName == "")
2990  classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
2991  else
2992  classFileName = theClassFileName;
2993 
2994  TString className = TString("Read") + GetMethodName();
2995 
2996  TString tfname( classFileName );
2997  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
2998  << "Creating standalone class: "
2999  << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
3000 
3001  std::ofstream fout( classFileName );
3002  if (!fout.good()) { // file could not be opened --> Error
3003  Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
3004  }
3005 
3006  // now create the class
3007  // preamble
3008  fout << "// Class: " << className << std::endl;
3009  fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
3010 
3011  // print general information and configuration state
3012  fout << std::endl;
3013  fout << "/* configuration options =====================================================" << std::endl << std::endl;
3014  WriteStateToStream( fout );
3015  fout << std::endl;
3016  fout << "============================================================================ */" << std::endl;
3017 
3018  // generate the class
3019  fout << "" << std::endl;
3020  fout << "#include <array>" << std::endl;
3021  fout << "#include <vector>" << std::endl;
3022  fout << "#include <cmath>" << std::endl;
3023  fout << "#include <string>" << std::endl;
3024  fout << "#include <iostream>" << std::endl;
3025  fout << "" << std::endl;
3026  // now if the classifier needs to write some additional classes for its response implementation
3027  // this code goes here: (at least the header declarations need to come before the main class
3028  this->MakeClassSpecificHeader( fout, className );
3029 
3030  fout << "#ifndef IClassifierReader__def" << std::endl;
3031  fout << "#define IClassifierReader__def" << std::endl;
3032  fout << std::endl;
3033  fout << "class IClassifierReader {" << std::endl;
3034  fout << std::endl;
3035  fout << " public:" << std::endl;
3036  fout << std::endl;
3037  fout << " // constructor" << std::endl;
3038  fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
3039  fout << " virtual ~IClassifierReader() {}" << std::endl;
3040  fout << std::endl;
3041  fout << " // return classifier response" << std::endl;
3042  fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3043  fout << std::endl;
3044  fout << " // returns classifier status" << std::endl;
3045  fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3046  fout << std::endl;
3047  fout << " protected:" << std::endl;
3048  fout << std::endl;
3049  fout << " bool fStatusIsClean;" << std::endl;
3050  fout << "};" << std::endl;
3051  fout << std::endl;
3052  fout << "#endif" << std::endl;
3053  fout << std::endl;
3054  fout << "class " << className << " : public IClassifierReader {" << std::endl;
3055  fout << std::endl;
3056  fout << " public:" << std::endl;
3057  fout << std::endl;
3058  fout << " // constructor" << std::endl;
3059  fout << " " << className << "( std::vector<std::string>& theInputVars )" << std::endl;
3060  fout << " : IClassifierReader()," << std::endl;
3061  fout << " fClassName( \"" << className << "\" )," << std::endl;
3062  fout << " fNvars( " << GetNvar() << " )," << std::endl;
3063  fout << " fIsNormalised( " << (IsNormalised() ? "true" : "false") << " )" << std::endl;
3064  fout << " {" << std::endl;
3065  fout << " // the training input variables" << std::endl;
3066  fout << " const char* inputVars[] = { ";
3067  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3068  fout << "\"" << GetOriginalVarName(ivar) << "\"";
3069  if (ivar<GetNvar()-1) fout << ", ";
3070  }
3071  fout << " };" << std::endl;
3072  fout << std::endl;
3073  fout << " // sanity checks" << std::endl;
3074  fout << " if (theInputVars.size() <= 0) {" << std::endl;
3075  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3076  fout << " fStatusIsClean = false;" << std::endl;
3077  fout << " }" << std::endl;
3078  fout << std::endl;
3079  fout << " if (theInputVars.size() != fNvars) {" << std::endl;
3080  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3081  fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3082  fout << " fStatusIsClean = false;" << std::endl;
3083  fout << " }" << std::endl;
3084  fout << std::endl;
3085  fout << " // validate input variables" << std::endl;
3086  fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3087  fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3088  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3089  fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3090  fout << " fStatusIsClean = false;" << std::endl;
3091  fout << " }" << std::endl;
3092  fout << " }" << std::endl;
3093  fout << std::endl;
3094  fout << " // initialize min and max vectors (for normalisation)" << std::endl;
3095  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3096  fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
3097  fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
3098  }
3099  fout << std::endl;
3100  fout << " // initialize input variable types" << std::endl;
3101  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3102  fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3103  }
3104  fout << std::endl;
3105  fout << " // initialize constants" << std::endl;
3106  fout << " Initialize();" << std::endl;
3107  fout << std::endl;
3108  if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3109  fout << " // initialize transformation" << std::endl;
3110  fout << " InitTransform();" << std::endl;
3111  }
3112  fout << " }" << std::endl;
3113  fout << std::endl;
3114  fout << " // destructor" << std::endl;
3115  fout << " virtual ~" << className << "() {" << std::endl;
3116  fout << " Clear(); // method-specific" << std::endl;
3117  fout << " }" << std::endl;
3118  fout << std::endl;
3119  fout << " // the classifier response" << std::endl;
3120  fout << " // \"inputValues\" is a vector of input values in the same order as the" << std::endl;
3121  fout << " // variables given to the constructor" << std::endl;
3122  fout << " double GetMvaValue( const std::vector<double>& inputValues ) const override;" << std::endl;
3123  fout << std::endl;
3124  fout << " private:" << std::endl;
3125  fout << std::endl;
3126  fout << " // method-specific destructor" << std::endl;
3127  fout << " void Clear();" << std::endl;
3128  fout << std::endl;
3129  if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3130  fout << " // input variable transformation" << std::endl;
3131  GetTransformationHandler().MakeFunction(fout, className,1);
3132  fout << " void InitTransform();" << std::endl;
3133  fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3134  fout << std::endl;
3135  }
3136  fout << " // common member variables" << std::endl;
3137  fout << " const char* fClassName;" << std::endl;
3138  fout << std::endl;
3139  fout << " const size_t fNvars;" << std::endl;
3140  fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3141  fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3142  fout << std::endl;
3143  fout << " // normalisation of input variables" << std::endl;
3144  fout << " const bool fIsNormalised;" << std::endl;
3145  fout << " bool IsNormalised() const { return fIsNormalised; }" << std::endl;
3146  fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3147  fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3148  fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3149  fout << " // normalise to output range: [-1, 1]" << std::endl;
3150  fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3151  fout << " }" << std::endl;
3152  fout << std::endl;
3153  fout << " // type of input variable: 'F' or 'I'" << std::endl;
3154  fout << " char fType[" << GetNvar() << "];" << std::endl;
3155  fout << std::endl;
3156  fout << " // initialize internal variables" << std::endl;
3157  fout << " void Initialize();" << std::endl;
3158  fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3159  fout << "" << std::endl;
3160  fout << " // private members (method specific)" << std::endl;
3161 
3162  // call the classifier specific output (the classifier must close the class !)
3163  MakeClassSpecific( fout, className );
3164 
3165  fout << " inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3166  fout << " {" << std::endl;
3167  fout << " // classifier response value" << std::endl;
3168  fout << " double retval = 0;" << std::endl;
3169  fout << std::endl;
3170  fout << " // classifier response, sanity check first" << std::endl;
3171  fout << " if (!IsStatusClean()) {" << std::endl;
3172  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3173  fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3174  fout << " retval = 0;" << std::endl;
3175  fout << " }" << std::endl;
3176  fout << " else {" << std::endl;
3177  fout << " if (IsNormalised()) {" << std::endl;
3178  fout << " // normalise variables" << std::endl;
3179  fout << " std::vector<double> iV;" << std::endl;
3180  fout << " iV.reserve(inputValues.size());" << std::endl;
3181  fout << " int ivar = 0;" << std::endl;
3182  fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3183  fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3184  fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3185  fout << " }" << std::endl;
3186  if (GetTransformationHandler().GetTransformationList().GetSize()!=0 &&
3189  fout << " Transform( iV, -1 );" << std::endl;
3190  }
3191  fout << " retval = GetMvaValue__( iV );" << std::endl;
3192  fout << " }" << std::endl;
3193  fout << " else {" << std::endl;
3194  if (GetTransformationHandler().GetTransformationList().GetSize()!=0 &&
3197  fout << " std::vector<double> iV;" << std::endl;
3198  fout << " int ivar = 0;" << std::endl;
3199  fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3200  fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3201  fout << " iV.push_back(*varIt);" << std::endl;
3202  fout << " }" << std::endl;
3203  fout << " Transform( iV, -1 );" << std::endl;
3204  fout << " retval = GetMvaValue__( iV );" << std::endl;
3205  }
3206  else {
3207  fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3208  }
3209  fout << " }" << std::endl;
3210  fout << " }" << std::endl;
3211  fout << std::endl;
3212  fout << " return retval;" << std::endl;
3213  fout << " }" << std::endl;
3214 
3215  // create output for transformation - if any
3216  if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3217  GetTransformationHandler().MakeFunction(fout, className,2);
3218 
3219  // close the file
3220  fout.close();
3221 }
3222 
3223 ////////////////////////////////////////////////////////////////////////////////
3224 /// prints out method-specific help method
3225 
3227 {
3228  // if options are written to reference file, also append help info
3229  std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3230  std::ofstream* o = 0;
3231  if (gConfig().WriteOptionsReference()) {
3232  Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3233  o = new std::ofstream( GetReferenceFile(), std::ios::app );
3234  if (!o->good()) { // file could not be opened --> Error
3235  Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3236  }
3237  std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3238  }
3239 
3240  // "|--------------------------------------------------------------|"
3241  if (!o) {
3242  Log() << kINFO << Endl;
3243  Log() << gTools().Color("bold")
3244  << "================================================================"
3245  << gTools().Color( "reset" )
3246  << Endl;
3247  Log() << gTools().Color("bold")
3248  << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3249  << gTools().Color( "reset" )
3250  << Endl;
3251  }
3252  else {
3253  Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3254  }
3255 
3256  // print method-specific help message
3257  GetHelpMessage();
3258 
3259  if (!o) {
3260  Log() << Endl;
3261  Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3262  Log() << gTools().Color("bold")
3263  << "================================================================"
3264  << gTools().Color( "reset" )
3265  << Endl;
3266  Log() << Endl;
3267  }
3268  else {
3269  // indicate END
3270  Log() << "# End of Message___" << Endl;
3271  }
3272 
3273  std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3274  if (o) o->close();
3275 }
3276 
3277 // ----------------------- r o o t f i n d i n g ----------------------------
3278 
3279 ////////////////////////////////////////////////////////////////////////////////
3280 /// returns efficiency as function of cut
3281 
3283 {
3284  Double_t retval=0;
3285 
3286  // retrieve the class object
3287  if (Use_Splines_for_Eff_) {
3288  retval = fSplRefS->Eval( theCut );
3289  }
3290  else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3291 
3292  // caution: here we take some "forbidden" action to hide a problem:
3293  // in some cases, in particular for likelihood, the binned efficiency distributions
3294  // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3295  // unbinned information available in the trees, but the unbinned minimization is
3296  // too slow, and we don't need to do a precision measurement here. Hence, we force
3297  // this property.
3298  Double_t eps = 1.0e-5;
3299  if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3300  else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3301 
3302  return retval;
3303 }
3304 
3305 ////////////////////////////////////////////////////////////////////////////////
3306 /// returns the event collection (i.e. the dataset) TRANSFORMED using the
3307 /// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3308 
3310 {
3311  // if there's no variable transformation for this classifier, just hand back the
3312  // event collection of the data set
3313  if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3314  return (Data()->GetEventCollection(type));
3315  }
3316 
3317  // otherwise, transform ALL the events and hand back the vector of the pointers to the
3318  // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3319  // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3320  Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3321  if (fEventCollections.at(idx) == 0) {
3322  fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3324  }
3325  return *(fEventCollections.at(idx));
3326 }
3327 
3328 ////////////////////////////////////////////////////////////////////////////////
3329 /// calculates the TMVA version string from the training version code on the fly
3330 
3332 {
3333  UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3334  UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3335  UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3336 
3337  return TString(Form("%i.%i.%i",a,b,c));
3338 }
3339 
3340 ////////////////////////////////////////////////////////////////////////////////
3341 /// calculates the ROOT version string from the training version code on the fly
3342 
3344 {
3345  UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3346  UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3347  UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3348 
3349  return TString(Form("%i.%02i/%02i",a,b,c));
3350 }
3351 
3352 ////////////////////////////////////////////////////////////////////////////////
3353 
3355  ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
3357 
3358  if (mvaRes != NULL) {
3359  TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3360  TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3361  TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3362  TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3363 
3364  if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3365 
3366  if (SorB == 's' || SorB == 'S')
3367  return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3368  else
3369  return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3370  }
3371  return -1;
3372 }
virtual void DeclareOptions()=0
Bool_t HasMVAPdfs() const
Definition: MethodBase.h:426
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:584
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
TString fMethodName
Definition: MethodBase.h:604
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:941
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition: TH1.cxx:3572
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:6101
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3251
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables, variable transformation type etc.
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters according to the argument This is just a dummy .
Definition: MethodBase.cxx:649
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
virtual Double_t GetMaximum(Double_t maxval=FLT_MAX) const
Return maximum value smaller than maxval of bins in the range, unless the value has been overridden b...
Definition: TH1.cxx:7844
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
TXMLEngine & xmlengine()
Definition: Tools.h:270
float xmin
Definition: THbookFile.cxx:93
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8434
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodBase.h:224
#define TMVA_VERSION_CODE
Definition: Version.h:47
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ReadOptionsFromXML(void *node)
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:964
Singleton class for Global types used by TMVA.
Definition: Types.h:73
long long Long64_t
Definition: RtypesCore.h:69
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
Class that is the base-class for a vector of result.
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:212
Bool_t fIgnoreNegWeightsInTraining
Definition: MethodBase.h:671
virtual const char * WorkingDirectory()
Return working directory.
Definition: TSystem.cxx:869
void ReadStateFromXML(void *parent)
std::vector< VariableInfo > & GetSpectatorInfos()
Definition: DataSetInfo.h:104
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream ...
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
TLine * line
virtual void MakeClassSpecificHeader(std::ostream &, const TString &="") const
Definition: MethodBase.h:512
Collectable string class.
Definition: TObjString.h:28
TSpline1 * fSplTrainRefS
Definition: MethodBase.h:694
float Float_t
Definition: RtypesCore.h:53
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
std::vector< TGraph * > fGraphs
Definition: MethodBase.h:104
void ReadOptionsFromStream(std::istream &istr)
read option back from the weight file
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
UInt_t GetNvar() const
Definition: MethodBase.h:335
TH1 * GetSmoothedHist() const
Definition: PDF.h:95
virtual const char * GetBuildNode() const
Return the build node name.
Definition: TSystem.cxx:3806
void BuildPDF(const TH1 *theHist)
Definition: PDF.cxx:259
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
const TString & GetOriginalVarName(Int_t ivar) const
Definition: MethodBase.h:500
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:628
TString fWeightFile
Definition: MethodBase.h:627
TString fVariableTransformTypeString
Definition: MethodBase.h:713
Config & gConfig()
void PlotVariables(const std::vector< Event *> &events, TDirectory *theDirectory=0)
create histograms from the input variables
MsgLogger & Log() const
Definition: Configurable.h:122
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
TransformationHandler * fTransformationPointer
Definition: MethodBase.h:660
Types::ESBType fVariableTransformType
Definition: MethodBase.h:600
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4770
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:47
virtual Double_t Eval(Double_t x) const
returns linearly interpolated TGraph entry around x
Definition: TSpline1.cxx:61
EAnalysisType
Definition: Types.h:127
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:35
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum=0)
Compute Quantiles for this histogram Quantile x_q of a probability distribution Function F is defined...
Definition: TH1.cxx:4322
TH1 * GetPDFHist() const
Definition: PDF.h:92
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:825
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6930
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodBase.h:218
const TString & GetReferenceFile() const
Definition: Configurable.h:102
Basic string class.
Definition: TString.h:131
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition: TH1.cxx:705
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
void SetTrainTime(Double_t trainTime)
Definition: MethodBase.h:159
TMultiGraph * fMultiGraph
Definition: MethodBase.h:101
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition: Tools.cxx:601
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:385
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1100
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
int Int_t
Definition: RtypesCore.h:41
virtual void SetYTitle(const char *title)
Definition: TH1.h:406
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
bool Bool_t
Definition: RtypesCore.h:59
virtual void TestMulticlass()
test multiclass classification
TString fJobName
Definition: MethodBase.h:603
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
TSpline1 * fSplRefB
Definition: MethodBase.h:692
UInt_t GetNTargets() const
Definition: MethodBase.h:337
TSpline1 * fSplRefS
Definition: MethodBase.h:691
TMatrixD GetConfusionMatrix(Double_t effB)
Returns a confusion matrix where each class is pitted against each other.
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:227
std::vector< TString > * fInputVars
Definition: MethodBase.h:577
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
Definition: MethodBase.cxx:729
#define ROOT_VERSION_CODE
Definition: RVersion.h:21
void ReadTargetsFromXML(void *tarnode)
read target info from XML
virtual void Init()=0
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, , curve for given number of signal and background events; returns cut for maximum ...
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:353
Double_t fTrainTime
Definition: MethodBase.h:684
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored ...
Double_t GetTrainingSumSignalWeights()
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1136
Double_t fTestTime
Definition: MethodBase.h:685
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
MsgLogger * fLogger
Definition: Configurable.h:128
Double_t GetTrainTime() const
Definition: MethodBase.h:160
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:859
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1225
void ReadVariablesFromXML(void *varnode)
read variable info from XML
const TString & GetExpression() const
Definition: VariableInfo.h:57
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3976
const TString & GetWeightFileDir() const
Definition: MethodBase.h:481
UInt_t fSignalClass
Definition: MethodBase.h:678
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
double sqrt(double)
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:392
Class that contains all the information of a class.
Definition: ClassInfo.h:49
Double_t GetXmin() const
Definition: TAxis.h:133
char GetVarType() const
Definition: VariableInfo.h:61
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:596
UInt_t TreeIndex(Types::ETreeType type) const
Definition: DataSet.h:192
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2286
ECutOrientation fCutOrientation
Definition: MethodBase.h:688
virtual ~MethodBase()
destructor
Definition: MethodBase.cxx:369
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
UInt_t GetTrainingTMVAVersionCode() const
Definition: MethodBase.h:380
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
Definition: MethodBase.cxx:899
void AddXMLTo(void *parent=0) const
XML node describing the transformation.
const Event * GetEvent() const
Definition: MethodBase.h:740
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
Definition: MethodBase.cxx:242
DataSet * Data() const
Definition: MethodBase.h:400
void ClearGraphs()
This function sets the point number to 0 for all graphs.
Definition: MethodBase.cxx:198
Bool_t CheckSplines(const TH1 *, const TSpline *)
check quality of splining by comparing splines and histograms in each bin
Definition: Tools.cxx:491
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
void ReadStateFromFile()
Function to write options and weights to file.
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
std::vector< Float_t > * GetValueVector()
void SetCallerName(const TString &name)
~IPythonInteractive()
standard destructor
Definition: MethodBase.cxx:159
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:873
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity: where PDF(x) is the PDF of the classifier&#39;s signal or background distribution ...
void PrintHelpMessage() const
prints out method-specific help method
Double_t fMeanB
Definition: MethodBase.h:651
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
Definition: TString.cxx:2152
IONames & GetIONames()
Definition: Config.h:90
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:395
virtual void ParseOptions()
options parser
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:347
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:411
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:174
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
void SetOptions(const TString &s)
Definition: Configurable.h:85
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition: TSystem.cxx:1574
Bool_t DoRegression() const
Definition: MethodBase.h:429
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:72
TString fTestvar
Definition: MethodBase.h:606
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:487
Bool_t DoesExist(const TString &alias) const
Returns true if there is an object stored in the result for a given alias, false otherwise.
Definition: Results.cxx:127
Class that contains all the data information.
Definition: DataSetInfo.h:60
TFile * GetFile() const
Definition: MethodBase.h:361
virtual void ProcessOptions()=0
virtual Double_t GetProba(const Event *ev)
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
TH1F * h1
Definition: legend1.C:5
TSpline * fSpleffBvsS
Definition: MethodBase.h:641
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
Definition: TH1.cxx:1200
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
std::vector< VariableInfo > & GetTargetInfos()
Definition: DataSetInfo.h:99
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
Bool_t fModelPersistence
Definition: MethodBase.h:622
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Definition: RootFinder.cxx:72
virtual void AddWeightsXMLTo(void *parent) const =0
UInt_t fTMVATrainingVersion
Definition: MethodBase.h:607
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:408
const std::vector< Event * > * CalcTransformations(const std::vector< Event *> &, Bool_t createNewVector=kFALSE)
computation of transformation
A doubly linked list.
Definition: TList.h:44
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:348
TransformationHandler fTransformation
Definition: MethodBase.h:661
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
Bool_t DoMulticlass() const
Definition: MethodBase.h:430
virtual Double_t KolmogorovTest(const TH1 *h2, Option_t *option="") const
Statistical test of compatibility in shape between this histogram and h2, using Kolmogorov test...
Definition: TH1.cxx:7527
virtual void MakeClassSpecific(std::ostream &, const TString &="") const
Definition: MethodBase.h:509
virtual void ReadWeightsFromXML(void *wghtnode)=0
Int_t GetHistNBins(Int_t evtNum=0)
Definition: PDF.cxx:303
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
TString fWeightFileExtension
Definition: Config.h:113
TString fUser
Definition: TSystem.h:142
virtual void Train()=0
void * GetExternalLink() const
Definition: VariableInfo.h:81
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables, variable transformation type etc.
Double_t fRmsB
Definition: MethodBase.h:653
Double_t fXmin
Definition: MethodBase.h:654
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
Linear interpolation class.
TSpline1 * fSplTrainRefB
Definition: MethodBase.h:695
ROOT::R::TRInterface & r
Definition: Object.C:4
Class to manage histogram axis.
Definition: TAxis.h:30
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
TDirectory * fMethodBaseDir
Definition: MethodBase.h:615
SVector< double, 2 > v
Definition: Dict.h:5
UInt_t fROOTTrainingVersion
Definition: MethodBase.h:608
const char * GetName() const
Definition: MethodBase.h:325
ClassInfo * GetClassInfo(Int_t clNum) const
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream...
void AddClassesXMLTo(void *parent) const
write class info to XML
auto * a
Definition: textangle.C:12
virtual void ReadTransformationFromStream(std::istream &istr, const TString &classname="")=0
const Int_t NBIN_HIST_HIGH
Definition: MethodBase.cxx:137
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:250
class TMVA::Config::VariablePlotting fVariablePlotting
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8514
Float_t GetAchievablePur(UInt_t cls)
void SetReadingVersion(UInt_t rv)
Definition: PDF.h:119
void SetValue(Float_t value, Int_t ievt)
set MVA response
UInt_t GetTrainingROOTVersionCode() const
Definition: MethodBase.h:381
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fMeanS
Definition: MethodBase.h:650
Double_t ElapsedSeconds(void)
computes elapsed tim in seconds
Definition: Timer.cxx:125
char * Form(const char *fmt,...)
Ssiz_t Length() const
Definition: TString.h:405
void ReadFromXML(void *varnode)
read VariableInfo from stream
Bool_t Help() const
Definition: MethodBase.h:493
Int_t fNsmoothMVAPdf
Definition: MethodBase.h:716
Bool_t fTxtWeightsOnly
Definition: MethodBase.h:714
void CreateMulticlassPerformanceHistos(TString prefix)
Create performance graphs for this classifier a multiclass setting.
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1081
const TString & GetJobName() const
Definition: MethodBase.h:321
Double_t GetXmin() const
Definition: PDF.h:104
const TString & GetMethodName() const
Definition: MethodBase.h:322
Linear interpolation class.
TDirectory * fBaseDir
Definition: MethodBase.h:614
#define s1(x)
Definition: RSha256.hxx:91
UInt_t GetNSpectators(bool all=kTRUE) const
virtual Double_t Eval(Double_t x) const =0
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
Bool_t fHasMVAPdfs
Definition: MethodBase.h:669
TSpline * fSplTrainEffBvsS
Definition: MethodBase.h:645
static constexpr double m2
float xmax
Definition: THbookFile.cxx:93
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:354
Tools & gTools()
Gaussian Transformation of input variables.
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
Definition: MethodBase.cxx:514
void BuildTransformationFromVarInfo(const std::vector< TMVA::VariableInfo > &var)
this method is only used when building a normalization transformation from old text files in this cas...
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:610
Bool_t IsSilentFile()
Definition: MethodBase.h:370
virtual Double_t GetSignificance() const
compute significance of mean difference
Linear interpolation class.
void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part) const
create transformation function
TString GetWeightFileName() const
retrieve weight file name
Linear interpolation of TGraph.
Definition: TSpline1.h:43
Double_t GetSignalReferenceCutOrientation() const
Definition: MethodBase.h:352
REAL epsilon
Definition: triangle.c:617
void ProcessBaseOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodBase.cxx:545
Int_t FindVarIndex(const TString &) const
find variable by name
UInt_t GetNVariables() const
Definition: MethodBase.h:336
std::vector< const std::vector< TMVA::Event * > * > fEventCollections
Definition: MethodBase.h:697
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
TString fVerbosityLevelString
Definition: MethodBase.h:666
Double_t fRmsS
Definition: MethodBase.h:652
void CreateMulticlassHistos(TString prefix, Int_t nbins, Int_t nbins_high)
this function fills the mva response histos for multiclass classification
UInt_t fBackgroundClass
Definition: MethodBase.h:679
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: PDF.cxx:823
TList * GetStorage() const
Definition: Results.h:73
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:401
int Ssiz_t
Definition: RtypesCore.h:63
#define d(i)
Definition: RSha256.hxx:102
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:438
#define ClassImp(name)
Definition: Rtypes.h:359
void SetTestTime(Double_t testTime)
Definition: MethodBase.h:163
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:749
double Double_t
Definition: RtypesCore.h:55
std::vector< Double_t > GetBestMultiClassCuts(UInt_t targetClass)
calculate the best working point (optimal cut values) for the multiclass classifier ...
void SetWeightFileName(TString)
set the weight file name (depreciated)
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
Double_t GetXmax() const
Definition: PDF.h:105
Describe directory structure in memory.
Definition: TDirectory.h:34
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:587
Bool_t IsNormalised() const
Definition: MethodBase.h:485
Double_t GetTrainingSumBackgrWeights()
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:136
int type
Definition: TGX11.cxx:120
Class which takes the results of a multiclass classification.
static RooMathCoreReg dummy
virtual void GetHelpMessage() const =0
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
void AddVarsXMLTo(void *parent) const
write variable info to XML
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
The TH1 histogram class.
Definition: TH1.h:56
const Bool_t Use_Splines_for_Eff_
Definition: MethodBase.cxx:134
static constexpr double s
const char * AsString() const
Return the date & time as a string (ctime() format).
Definition: TDatime.cxx:101
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:96
void AddPreDefVal(const T &)
Definition: Configurable.h:168
IPythonInteractive()
standard constructor
Definition: MethodBase.cxx:151
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------— ...
ClassInfo * AddClass(const TString &className)
void AddXMLTo(void *parent)
XML file writing.
Definition: PDF.cxx:922
UInt_t GetNumber() const
Definition: ClassInfo.h:65
Bool_t fConstructedFromWeightFile
Definition: MethodBase.h:609
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:428
TString fVarTransformString
Definition: MethodBase.h:658
Bool_t IsNull() const
Definition: TString.h:402
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:799
void ComputeStat(const std::vector< TMVA::Event *> &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:214
const TString & GetOptions() const
Definition: Configurable.h:84
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
void SetConfigName(const char *n)
Definition: Configurable.h:63
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
void ValidatePDF(TH1 *original=0) const
comparison of original histogram with reference PDF
Definition: PDF.cxx:581
Float_t GetAchievableEff(UInt_t cls)
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
Types::EMVA fMethodType
Definition: MethodBase.h:605
char Char_t
Definition: RtypesCore.h:29
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition: RootFinder.h:48
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
Ranking * fRanking
Definition: MethodBase.h:576
virtual void SetXTitle(const char *title)
Definition: TH1.h:405
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:982
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:497
void ReadFromStream(std::istream &istr)
read VariableInfo from stream
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
TString GetMethodTypeName() const
Definition: MethodBase.h:323
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
void ProcessOptions()
Definition: PDF.cxx:873
Class that is the base-class for a vector of result.
Definition: Results.h:57
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
void AddToXML(void *varnode)
write class to XML
Double_t fSignalReferenceCut
the data set information (sometimes needed)
Definition: MethodBase.h:598
const Event * fTmpEvent
Definition: MethodBase.h:403
void SetWeightFileDir(TString fileDir)
set directory of weight file
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:351
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
Definition: TH1.cxx:8313
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
TH1 * GetOriginalHist() const
Definition: PDF.h:94
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
Definition: TDirectory.cxx:400
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
Int_t fNbinsMVAoutput
Definition: MethodBase.h:581
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:284
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
virtual Double_t GetTrainingEfficiency(const TString &)
void AddOptionsXMLTo(void *parent) const
write options to XML file
Bool_t fSilentFile
Definition: MethodBase.h:620
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
Definition: PDF.cxx:657
#define c(i)
Definition: RSha256.hxx:101
Double_t fXmax
Definition: MethodBase.h:655
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1896
Bool_t IsSignal(const Event *ev) const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:190
std::vector< Bool_t > * GetValueVectorTypes()
ECutOrientation GetCutOrientation() const
Definition: MethodBase.h:541
void InitBase()
default initialization called by all constructors
Definition: MethodBase.cxx:446
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:586
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:428
void AddTargetsXMLTo(void *parent) const
write target info to XML
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:86
virtual Int_t GetNbinsX() const
Definition: TH1.h:291
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
Double_t Sqrt(Double_t x)
Definition: TMath.h:690
Class for type info of MVA input variable.
Definition: VariableInfo.h:47
const TString & GetTestvarName() const
Definition: MethodBase.h:326
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:182
virtual void ReadWeightsFromStream(std::istream &)=0
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:291
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:332
TString fFileDir
Definition: MethodBase.h:626
const Bool_t kTRUE
Definition: RtypesCore.h:87
Types::EMVA GetMethodType() const
Definition: MethodBase.h:324
Double_t GetXmax() const
Definition: TAxis.h:134
void CheckForUnusedOptions() const
checks for unused options in option string
virtual Int_t GetMaximumBin() const
Return location of bin with maximum value in the range.
Definition: TH1.cxx:7874
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
virtual void TestClassification()
initialization
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write all objects in this collection.
const Event * GetEvent() const
Definition: DataSet.cxx:202
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:94
void SetExternalLink(void *p)
Definition: VariableInfo.h:73
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:427
char name[80]
Definition: TGX11.cxx:109
Class that is the base-class for a vector of result.
Bool_t fSetupCompleted
Definition: MethodBase.h:700
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:315
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:841
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:355
void WriteOptionsToStream(std::ostream &o, const TString &prefix) const
write options to output stream (e.g. in writing the MVA weight files
void Resize(Ssiz_t n)
Resize the string. Truncate or add blanks as necessary.
Definition: TString.cxx:1070
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
Definition: TDatime.h:37
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition: PDF.cxx:704
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)
void SetConfigDescription(const char *d)
Definition: Configurable.h:64
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition: Tools.cxx:133
virtual void Close(Option_t *option="")
Close a file.
Definition: TFile.cxx:917
const char * Data() const
Definition: TString.h:364