Logo ROOT   6.07/09
Reference Guide
MethodBase.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21  * *
22  * Copyright (c) 2005-2011: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  * *
32  **********************************************************************************/
33 
34 ////////////////////////////////////////////////////////////////////////////////
35 
36 /* Begin_Html
37  Virtual base Class for all MVA method
38 
39  MethodBase hosts several specific evaluation methods.
40 
41  The kind of MVA that provides optimal performance in an analysis strongly
42  depends on the particular application. The evaluation factory provides a
43  number of numerical benchmark results to directly assess the performance
44  of the MVA training on the independent test sample. These are:
45  <ul>
46  <li> The <i>signal efficiency</i> at three representative background efficiencies
47  (which is 1 &minus; rejection).</li>
48  <li> The <i>significance</I> of an MVA estimator, defined by the difference
49  between the MVA mean values for signal and background, divided by the
50  quadratic sum of their root mean squares.</li>
51  <li> The <i>separation</i> of an MVA <i>x</i>, defined by the integral
52  &frac12;&int;(S(x) &minus; B(x))<sup>2</sup>/(S(x) + B(x))dx, where
53  S(x) and B(x) are the signal and background distributions, respectively.
54  The separation is zero for identical signal and background MVA shapes,
55  and it is one for disjunctive shapes.
56  <li> <a name="mu_transform">
57  The average, &int;x &mu;(S(x))dx, of the signal &mu;-transform.
58  The &mu;-transform of an MVA denotes the transformation that yields
59  a uniform background distribution. In this way, the signal distributions
60  S(x) can be directly compared among the various MVAs. The stronger S(x)
61  peaks towards one, the better is the discrimination of the MVA. The
62  &mu;-transform is
63  <a href=http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html>documented here</a>.
64  </ul>
65  The MVA standard output also prints the linear correlation coefficients between
66  signal and background, which can be useful to eliminate variables that exhibit too
67  strong correlations.
68 
69  End_Html */
70 //_______________________________________________________________________
71 
72 #include "TMVA/MethodBase.h"
73 
74 #include "TMVA/Config.h"
75 #include "TMVA/Configurable.h"
76 #include "TMVA/DataSetInfo.h"
77 #include "TMVA/DataSet.h"
78 #include "TMVA/Factory.h"
79 #include "TMVA/IMethod.h"
80 #include "TMVA/MsgLogger.h"
81 #include "TMVA/PDF.h"
82 #include "TMVA/Ranking.h"
83 #include "TMVA/Factory.h"
84 #include "TMVA/DataLoader.h"
85 #include "TMVA/Tools.h"
86 #include "TMVA/Results.h"
88 #include "TMVA/ResultsRegression.h"
89 #include "TMVA/ResultsMulticlass.h"
90 #include "TMVA/RootFinder.h"
91 #include "TMVA/Timer.h"
92 #include "TMVA/Tools.h"
93 #include "TMVA/TSpline1.h"
94 #include "TMVA/Types.h"
98 #include "TMVA/VariableInfo.h"
101 #include "TMVA/VariableTransform.h"
102 #include "TMVA/Version.h"
103 
104 #include "TROOT.h"
105 #include "TSystem.h"
106 #include "TObjString.h"
107 #include "TQObject.h"
108 #include "TSpline.h"
109 #include "TMatrix.h"
110 #include "TMath.h"
111 #include "TH1F.h"
112 #include "TH2F.h"
113 #include "TFile.h"
114 #include "TKey.h"
115 #include "TGraph.h"
116 #include "Riostream.h"
117 #include "TXMLEngine.h"
118 
119 #include <iomanip>
120 #include <iostream>
121 #include <fstream>
122 #include <sstream>
123 #include <cstdlib>
124 #include <algorithm>
125 #include <limits>
126 
127 
129 
130 using std::endl;
131 using std::atof;
132 
133 //const Int_t MethodBase_MaxIterations_ = 200;
135 
136 //const Int_t NBIN_HIST_PLOT = 100;
137 const Int_t NBIN_HIST_HIGH = 10000;
138 
139 #ifdef _WIN32
140 /* Disable warning C4355: 'this' : used in base member initializer list */
141 #pragma warning ( disable : 4355 )
142 #endif
143 
144 
145 #include "TGraph.h"
146 #include "TMultiGraph.h"
147 
148 ////////////////////////////////////////////////////////////////////////////////
149 /// standard constructur
151 {
152  fNumGraphs = 0;
153  fIndex = 0;
154 }
155 
156 ////////////////////////////////////////////////////////////////////////////////
157 /// standard destructor
159 {
160  if (fMultiGraph){
161  delete fMultiGraph;
162  fMultiGraph = nullptr;
163  }
164  return;
165 }
166 
167 ////////////////////////////////////////////////////////////////////////////////
168 /// This function gets some title and it creates a TGraph for every title.
169 /// It also sets up the style for every TGraph. All graphs are added to a single TMultiGrah.
170 /// \param[in] graphTtitles vector of titles
171 void TMVA::IPythonInteractive::Init(std::vector<TString>& graphTitles)
172 {
173  if (fNumGraphs!=0){
174  std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
175  return;
176  }
177  Int_t color = 2;
178  for(auto& title : graphTitles){
179  fGraphs.push_back( new TGraph() );
180  fGraphs.back()->SetTitle(title);
181  fGraphs.back()->SetName(title);
182  fGraphs.back()->SetFillColor(color);
183  fGraphs.back()->SetLineColor(color);
184  fGraphs.back()->SetMarkerColor(color);
185  fMultiGraph->Add(fGraphs.back());
186  color += 2;
187  fNumGraphs += 1;
188  }
189  return;
190 }
191 
192 ////////////////////////////////////////////////////////////////////////////////
193 /// This function sets the point number to 0 for all graphs.
195 {
196  for(Int_t i=0; i<fNumGraphs; i++){
197  fGraphs[i]->Set(0);
198  }
199 }
200 
201 ////////////////////////////////////////////////////////////////////////////////
202 /// This function is used only in 2 TGraph case, and it will add new data points to graphs.
203 /// \param[in] x the x coordinate
204 /// \param[in] y1 the y coordinate for the first TGraph
205 /// \param[in] y2 the y coordinate for the second TGraph
207 {
208  fGraphs[0]->Set(fIndex+1);
209  fGraphs[1]->Set(fIndex+1);
210  fGraphs[0]->SetPoint(fIndex, x, y1);
211  fGraphs[1]->SetPoint(fIndex, x, y2);
212  fIndex++;
213  return;
214 }
215 
216 ////////////////////////////////////////////////////////////////////////////////
217 /// This function can add data points to as many TGraps as we have.
218 /// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
219 /// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
220 void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
221 {
222  for(Int_t i=0; i<fNumGraphs;i++){
223  fGraphs[i]->Set(fIndex+1);
224  fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
225  }
226  fIndex++;
227  return;
228 }
229 
230 
231 ////////////////////////////////////////////////////////////////////////////////
232 /// standard constructur
233 
235  Types::EMVA methodType,
236  const TString& methodTitle,
237  DataSetInfo& dsi,
238  const TString& theOption) :
239  IMethod(),
240  Configurable ( theOption ),
241  fTmpEvent ( 0 ),
242  fRanking ( 0 ),
243  fInputVars ( 0 ),
244  fAnalysisType ( Types::kNoAnalysisType ),
245  fRegressionReturnVal ( 0 ),
246  fMulticlassReturnVal ( 0 ),
247  fDataSetInfo ( dsi ),
248  fSignalReferenceCut ( 0.5 ),
249  fSignalReferenceCutOrientation( 1. ),
250  fVariableTransformType ( Types::kSignal ),
251  fJobName ( jobName ),
252  fMethodName ( methodTitle ),
253  fMethodType ( methodType ),
254  fTestvar ( "" ),
255  fTMVATrainingVersion ( TMVA_VERSION_CODE ),
256  fROOTTrainingVersion ( ROOT_VERSION_CODE ),
257  fConstructedFromWeightFile ( kFALSE ),
258  fBaseDir ( 0 ),
259  fMethodBaseDir ( 0 ),
260  fFile ( 0 ),
261  fSilentFile (kFALSE),
262  fModelPersistence (kTRUE),
263  fWeightFile ( "" ),
264  fEffS ( 0 ),
265  fDefaultPDF ( 0 ),
266  fMVAPdfS ( 0 ),
267  fMVAPdfB ( 0 ),
268  fSplS ( 0 ),
269  fSplB ( 0 ),
270  fSpleffBvsS ( 0 ),
271  fSplTrainS ( 0 ),
272  fSplTrainB ( 0 ),
273  fSplTrainEffBvsS ( 0 ),
274  fVarTransformString ( "None" ),
275  fTransformationPointer ( 0 ),
276  fTransformation ( dsi, methodTitle ),
277  fVerbose ( kFALSE ),
278  fVerbosityLevelString ( "Default" ),
279  fHelp ( kFALSE ),
280  fHasMVAPdfs ( kFALSE ),
281  fIgnoreNegWeightsInTraining( kFALSE ),
282  fSignalClass ( 0 ),
283  fBackgroundClass ( 0 ),
284  fSplRefS ( 0 ),
285  fSplRefB ( 0 ),
286  fSplTrainRefS ( 0 ),
287  fSplTrainRefB ( 0 ),
288  fSetupCompleted (kFALSE)
289 {
290  SetTestvarName();
292 
293 // // default extension for weight files
294 }
295 
296 ////////////////////////////////////////////////////////////////////////////////
297 /// constructor used for Testing + Application of the MVA,
298 /// only (no training), using given WeightFiles
299 
301  DataSetInfo& dsi,
302  const TString& weightFile ) :
303  IMethod(),
304  Configurable(""),
305  fTmpEvent ( 0 ),
306  fRanking ( 0 ),
307  fInputVars ( 0 ),
308  fAnalysisType ( Types::kNoAnalysisType ),
309  fRegressionReturnVal ( 0 ),
310  fMulticlassReturnVal ( 0 ),
311  fDataSetInfo ( dsi ),
312  fSignalReferenceCut ( 0.5 ),
313  fVariableTransformType ( Types::kSignal ),
314  fJobName ( "" ),
315  fMethodName ( "MethodBase" ),
316  fMethodType ( methodType ),
317  fTestvar ( "" ),
318  fTMVATrainingVersion ( 0 ),
319  fROOTTrainingVersion ( 0 ),
320  fConstructedFromWeightFile ( kTRUE ),
321  fBaseDir ( 0 ),
322  fMethodBaseDir ( 0 ),
323  fFile ( 0 ),
325  fModelPersistence (kTRUE),
326  fWeightFile ( weightFile ),
327  fEffS ( 0 ),
328  fDefaultPDF ( 0 ),
329  fMVAPdfS ( 0 ),
330  fMVAPdfB ( 0 ),
331  fSplS ( 0 ),
332  fSplB ( 0 ),
333  fSpleffBvsS ( 0 ),
334  fSplTrainS ( 0 ),
335  fSplTrainB ( 0 ),
336  fSplTrainEffBvsS ( 0 ),
337  fVarTransformString ( "None" ),
339  fTransformation ( dsi, "" ),
340  fVerbose ( kFALSE ),
341  fVerbosityLevelString ( "Default" ),
342  fHelp ( kFALSE ),
343  fHasMVAPdfs ( kFALSE ),
345  fSignalClass ( 0 ),
346  fBackgroundClass ( 0 ),
347  fSplRefS ( 0 ),
348  fSplRefB ( 0 ),
349  fSplTrainRefS ( 0 ),
350  fSplTrainRefB ( 0 ),
352 {
354 // // constructor used for Testing + Application of the MVA,
355 // // only (no training), using given WeightFiles
356 }
357 
358 ////////////////////////////////////////////////////////////////////////////////
359 /// destructor
360 
362 {
363  // destructor
364  if (!fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
365 
366  // destructor
367  if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
368  if (fRanking != 0) delete fRanking;
369 
370  // PDFs
371  if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
372  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
373  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
374 
375  // Splines
376  if (fSplS) { delete fSplS; fSplS = 0; }
377  if (fSplB) { delete fSplB; fSplB = 0; }
378  if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
379  if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
380  if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
381  if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
382  if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
384 
385  for (Int_t i = 0; i < 2; i++ ) {
386  if (fEventCollections.at(i)) {
387  for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
388  it != fEventCollections.at(i)->end(); it++) {
389  delete (*it);
390  }
391  delete fEventCollections.at(i);
392  fEventCollections.at(i) = 0;
393  }
394  }
395 
398 }
399 
400 ////////////////////////////////////////////////////////////////////////////////
401 /// setup of methods
402 
404 {
405  // setup of methods
406 
407  if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
408  InitBase();
410  Init();
411  DeclareOptions();
413 }
414 
415 ////////////////////////////////////////////////////////////////////////////////
416 /// process all options
417 /// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
418 /// (sometimes, eg, fitters are used which can only be implemented during training phase)
419 
421 {
423  ProcessOptions();
424 }
425 
426 ////////////////////////////////////////////////////////////////////////////////
427 /// check may be overridden by derived class
428 /// (sometimes, eg, fitters are used which can only be implemented during training phase)
429 
431 {
433 }
434 
435 ////////////////////////////////////////////////////////////////////////////////
436 /// default initialization called by all constructors
437 
439 {
440  SetConfigDescription( "Configuration options for classifier architecture and tuning" );
441 
445 
446  fSplTrainS = 0;
447  fSplTrainB = 0;
448  fSplTrainEffBvsS = 0;
449  fMeanS = -1;
450  fMeanB = -1;
451  fRmsS = -1;
452  fRmsB = -1;
453  fXmin = DBL_MAX;
454  fXmax = -DBL_MAX;
456  fSplRefS = 0;
457  fSplRefB = 0;
458 
459  fTrainTime = -1.;
460  fTestTime = -1.;
461 
462  fRanking = 0;
463 
464  // temporary until the move to DataSet is complete
465  fInputVars = new std::vector<TString>;
466  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
467  fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
468  }
471 
472  fEventCollections.resize( 2 );
473  fEventCollections.at(0) = 0;
474  fEventCollections.at(1) = 0;
475 
476  // retrieve signal and background class index
477  if (DataInfo().GetClassInfo("Signal") != 0) {
478  fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
479  }
480  if (DataInfo().GetClassInfo("Background") != 0) {
481  fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
482  }
483 
484  SetConfigDescription( "Configuration options for MVA method" );
485  SetConfigName( TString("Method") + GetMethodTypeName() );
486 }
487 
488 ////////////////////////////////////////////////////////////////////////////////
489 /// define the options (their key words) that can be set in the option string
490 /// here the options valid for ALL MVA methods are declared.
491 /// know options: VariableTransform=None,Decorrelated,PCA to use transformed variables
492 /// instead of the original ones
493 /// VariableTransformType=Signal,Background which decorrelation matrix to use
494 /// in the method. Only the Likelihood
495 /// Method can make proper use of independent
496 /// transformations of signal and background
497 /// fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
498 /// fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
499 /// fHasMVAPdfs create PDFs for the MVA outputs
500 /// V for Verbose output (!V) for non verbos
501 /// H for Help message
502 
504 {
505  DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
506 
507  DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
508  AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
509  AddPreDefVal( TString("Debug") );
510  AddPreDefVal( TString("Verbose") );
511  AddPreDefVal( TString("Info") );
512  AddPreDefVal( TString("Warning") );
513  AddPreDefVal( TString("Error") );
514  AddPreDefVal( TString("Fatal") );
515 
516  // If True (default): write all training results (weights) as text files only;
517  // if False: write also in ROOT format (not available for all methods - will abort if not
518  fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
519  fNormalise = kFALSE; // OBSOLETE !!!
520 
521  DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
522 
523  DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
524 
525  DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
526 
527  DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
528  "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
529 }
530 
531 ////////////////////////////////////////////////////////////////////////////////
532 /// the option string is decoded, for availabel options see "DeclareOptions"
533 
535 {
536  if (HasMVAPdfs()) {
537  // setting the default bin num... maybe should be static ? ==> Please no static (JS)
538  // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
539  // reading every PDF's definition and passing the option string to the next one to be read and marked
540  fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
544  fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
548  fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
552 
553  // the final marked option string is written back to the original methodbase
555  }
556 
558  DataInfo(),
560  Log() );
561 
562  if (!HasMVAPdfs()) {
563  if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
564  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
565  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
566  }
567 
568  if (fVerbose) { // overwrites other settings
569  fVerbosityLevelString = TString("Verbose");
570  Log().SetMinType( kVERBOSE );
571  }
572  else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
573  else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
574  else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
575  else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
576  else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
577  else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
578  else if (fVerbosityLevelString != "Default" ) {
579  Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
580  << fVerbosityLevelString << "' unknown." << Endl;
581  }
583 }
584 
585 ////////////////////////////////////////////////////////////////////////////////
586 /// options that are used ONLY for the READER to ensure backward compatibility
587 /// they are hence without any effect (the reader is only reading the training
588 /// options that HAD been used at the training of the .xml weightfile at hand
589 
591 {
592  DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
593  DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
594  DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
595  "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
596  AddPreDefVal( TString("Signal") );
597  AddPreDefVal( TString("Background") );
598  DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
599  // Why on earth ?? was this here? Was the verbosity level option meant to 'disapear? Not a good idea i think..
600  // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
601  // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
602  // AddPreDefVal( TString("Debug") );
603  // AddPreDefVal( TString("Verbose") );
604  // AddPreDefVal( TString("Info") );
605  // AddPreDefVal( TString("Warning") );
606  // AddPreDefVal( TString("Error") );
607  // AddPreDefVal( TString("Fatal") );
608  DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
609  DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
610 }
611 
612 
613 ////////////////////////////////////////////////////////////////////////////////
614 /// call the Optimzier with the set of paremeters and ranges that
615 /// are meant to be tuned.
616 
617 std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
618 {
619  // this is just a dummy... needs to be implemented for each method
620  // individually (as long as we don't have it automatized via the
621  // configuraion string
622 
623  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
624  << GetName() << Endl;
625  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
626 
627  std::map<TString,Double_t> tunedParameters;
628  tunedParameters.size(); // just to get rid of "unused" warning
629  return tunedParameters;
630 
631 }
632 
633 ////////////////////////////////////////////////////////////////////////////////
634 /// set the tuning parameters accoding to the argument
635 /// This is just a dummy .. have a look at the MethodBDT how you could
636 /// perhaps implment the same thing for the other Classifiers..
637 
638 void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
639 {
640 }
641 
642 ////////////////////////////////////////////////////////////////////////////////
643 
645 {
647  Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
648 
649  // train the MVA method
650  if (Help()) PrintHelpMessage();
651 
652  // all histograms should be created in the method's subdirectory
653  if(!IsSilentFile()) BaseDir()->cd();
654 
655  // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
656  // needed for this classifier
658 
659  // call training of derived MVA
660  Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
661  << "Begin training" << Endl;
663  Timer traintimer( nEvents, GetName(), kTRUE );
664  Train();
665  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
666  << "\tEnd of training " << Endl;
667  SetTrainTime(traintimer.ElapsedSeconds());
668  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
669  << "Elapsed time for training with " << nEvents << " events: "
670  << traintimer.GetElapsedTime() << " " << Endl;
671 
672  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
673  << "\tCreate MVA output for ";
674 
675  // create PDFs for the signal and background MVA distributions (if required)
676  if (DoMulticlass()) {
677  Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
679  }
680  else if (!DoRegression()) {
681 
682  Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
684  if (HasMVAPdfs()) {
685  CreateMVAPdfs();
687  }
688 
689  } else {
690 
691  Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
693 
694  if (HasMVAPdfs() ) {
695  Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
696  CreateMVAPdfs();
697  }
698  }
699 
700  // write the current MVA state into stream
701  // produced are one text file and one ROOT file
703 
704  // produce standalone make class (presently only supported for classification)
705  if ((!DoRegression()) && (fModelPersistence)) MakeClass();
706 
707  // write additional monitoring histograms to main target file (not the weight file)
708  // again, make sure the histograms go into the method's subdirectory
709  if(!IsSilentFile())
710  {
711  BaseDir()->cd();
713  }
714 }
715 
716 ////////////////////////////////////////////////////////////////////////////////
717 
719 {
720  if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
721  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
723  bool truncate = false;
724  TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
725  stddev = sqrt(h1->GetMean());
726  truncate = true;
727  Double_t yq[1], xq[]={0.9};
728  h1->GetQuantiles(1,yq,xq);
729  TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
730  stddev90Percent = sqrt(h2->GetMean());
731  delete h1;
732  delete h2;
733 }
734 
735 ////////////////////////////////////////////////////////////////////////////////
736 /// prepare tree branch with the method's discriminating variable
737 
739 {
740  Data()->SetCurrentType(type);
741 
742  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
743 
745 
747 
748  // use timer
749  Timer timer( nEvents, GetName(), kTRUE );
750  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
751  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
752 
753  regRes->Resize( nEvents );
754  for (Int_t ievt=0; ievt<nEvents; ievt++) {
755  Data()->SetCurrentEvent(ievt);
756  std::vector< Float_t > vals = GetRegressionValues();
757  regRes->SetValue( vals, ievt );
758  timer.DrawProgressBar( ievt );
759  }
760 
761  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
762  << "Elapsed time for evaluation of " << nEvents << " events: "
763  << timer.GetElapsedTime() << " " << Endl;
764 
765  // store time used for testing
766  if (type==Types::kTesting)
767  SetTestTime(timer.ElapsedSeconds());
768 
769  TString histNamePrefix(GetTestvarName());
770  histNamePrefix += (type==Types::kTraining?"train":"test");
771  regRes->CreateDeviationHistograms( histNamePrefix );
772 }
773 
774 ////////////////////////////////////////////////////////////////////////////////
775 /// prepare tree branch with the method's discriminating variable
776 
778 {
779  Data()->SetCurrentType(type);
780 
781  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
782 
783  ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
784  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
785 
787 
788  // use timer
789  Timer timer( nEvents, GetName(), kTRUE );
790 
791  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
792  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
793 
794  resMulticlass->Resize( nEvents );
795  for (Int_t ievt=0; ievt<nEvents; ievt++) {
796  Data()->SetCurrentEvent(ievt);
797  std::vector< Float_t > vals = GetMulticlassValues();
798  resMulticlass->SetValue( vals, ievt );
799  timer.DrawProgressBar( ievt );
800  }
801 
802  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
803  << "Elapsed time for evaluation of " << nEvents << " events: "
804  << timer.GetElapsedTime() << " " << Endl;
805 
806  // store time used for testing
807  if (type==Types::kTesting)
808  SetTestTime(timer.ElapsedSeconds());
809 
810  TString histNamePrefix(GetTestvarName());
811  histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
812  resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
813 }
814 
815 
816 
817 ////////////////////////////////////////////////////////////////////////////////
818 
819 void TMVA::MethodBase::NoErrorCalc(Double_t* const err, Double_t* const errUpper) {
820  if (err) *err=-1;
821  if (errUpper) *errUpper=-1;
822 }
823 
824 ////////////////////////////////////////////////////////////////////////////////
825 
826 Double_t TMVA::MethodBase::GetMvaValue( const Event* const ev, Double_t* err, Double_t* errUpper ) {
827  fTmpEvent = ev;
828  Double_t val = GetMvaValue(err, errUpper);
829  fTmpEvent = 0;
830  return val;
831 }
832 
833 ////////////////////////////////////////////////////////////////////////////////
834 /// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
835 /// for a quick determination if an event would be selected as signal or background
836 
839 }
840 ////////////////////////////////////////////////////////////////////////////////
841 /// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
842 /// for a quick determination if an event with this mva output value would tbe selected as signal or background
843 
846 }
847 
848 ////////////////////////////////////////////////////////////////////////////////
849 /// prepare tree branch with the method's discriminating variable
850 
852 {
853  Data()->SetCurrentType(type);
854 
855  ResultsClassification* clRes =
857 
858 
860  std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
861 
862 
863  clRes->Resize( nEvents );
864 
865  // use timer
866  Timer timer( nEvents, GetName(), kTRUE );
867 
868  for (Int_t ievt=0; ievt<nEvents; ievt++) {
869  clRes->SetValue( mvaValues[ievt], ievt );
870  }
871 
872  // store time used for testing
873  if (type==Types::kTesting)
874  SetTestTime(timer.ElapsedSeconds());
875 
876 }
877 
878 ////////////////////////////////////////////////////////////////////////////////
879 /// get all the MVA values for the events of the current Data type
880 std::vector<Double_t> TMVA::MethodBase::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
881 {
882 
884  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
885  if (firstEvt < 0) firstEvt = 0;
886  std::vector<Double_t> values(lastEvt-firstEvt);
887  // log in case of looping on all the events
888  nEvents = values.size();
889 
890  // use timer
891  Timer timer( nEvents, GetName(), kTRUE );
892 
893  if (logProgress)
894  Log() << kHEADER<<Form("[%s] : ",DataInfo().GetName())<< "Evaluation of " << GetMethodName() << " on "
895  << (Data()->GetCurrentType()==Types::kTraining?"training":"testing") << " sample (" << nEvents << " events)" << Endl;
896 
897 
898  for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
899  Data()->SetCurrentEvent(ievt);
900  values[ievt] = GetMvaValue();
901 
902  // print progress
903  if (logProgress) {
904  Int_t modulo = Int_t(nEvents/100);
905  if (modulo <= 0 ) modulo = 1;
906  if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
907  }
908  }
909  if (logProgress) {
910  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
911  << "Elapsed time for evaluation of " << nEvents << " events: "
912  << timer.GetElapsedTime() << " " << Endl;
913  }
914 
915  return values;
916 }
917 
918 ////////////////////////////////////////////////////////////////////////////////
919 /// prepare tree branch with the method's discriminating variable
920 
922 {
923  Data()->SetCurrentType(type);
924 
925  ResultsClassification* mvaProb =
927 
929 
930  // use timer
931  Timer timer( nEvents, GetName(), kTRUE );
932 
933  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
934  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
935 
936  mvaProb->Resize( nEvents );
937  for (Int_t ievt=0; ievt<nEvents; ievt++) {
938 
939  Data()->SetCurrentEvent(ievt);
940  Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
941  if (proba < 0) break;
942  mvaProb->SetValue( proba, ievt );
943 
944  // print progress
945  Int_t modulo = Int_t(nEvents/100);
946  if (modulo <= 0 ) modulo = 1;
947  if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
948  }
949 
950  Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
951  << "Elapsed time for evaluation of " << nEvents << " events: "
952  << timer.GetElapsedTime() << " " << Endl;
953 }
954 
955 ////////////////////////////////////////////////////////////////////////////////
956 /// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
957 ///
958 /// bias = average deviation
959 /// dev = average absolute deviation
960 /// rms = rms of deviation
961 ///
962 
964  Double_t& dev, Double_t& devT,
965  Double_t& rms, Double_t& rmsT,
966  Double_t& mInf, Double_t& mInfT,
967  Double_t& corr,
969 {
970  Types::ETreeType savedType = Data()->GetCurrentType();
971  Data()->SetCurrentType(type);
972 
973  bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
974  Double_t sumw = 0;
975  Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
976  const Int_t nevt = GetNEvents();
977  Float_t* rV = new Float_t[nevt];
978  Float_t* tV = new Float_t[nevt];
979  Float_t* wV = new Float_t[nevt];
980  Float_t xmin = 1e30, xmax = -1e30;
981  for (Long64_t ievt=0; ievt<nevt; ievt++) {
982 
983  const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
984  Float_t t = ev->GetTarget(0);
985  Float_t w = ev->GetWeight();
987  Float_t d = (r-t);
988 
989  // find min/max
990  xmin = TMath::Min(xmin, TMath::Min(t, r));
991  xmax = TMath::Max(xmax, TMath::Max(t, r));
992 
993  // store for truncated RMS computation
994  rV[ievt] = r;
995  tV[ievt] = t;
996  wV[ievt] = w;
997 
998  // compute deviation-squared
999  sumw += w;
1000  bias += w * d;
1001  dev += w * TMath::Abs(d);
1002  rms += w * d * d;
1003 
1004  // compute correlation between target and regression estimate
1005  m1 += t*w; s1 += t*t*w;
1006  m2 += r*w; s2 += r*r*w;
1007  s12 += t*r;
1008  }
1009 
1010  // standard quantities
1011  bias /= sumw;
1012  dev /= sumw;
1013  rms /= sumw;
1014  rms = TMath::Sqrt(rms - bias*bias);
1015 
1016  // correlation
1017  m1 /= sumw;
1018  m2 /= sumw;
1019  corr = s12/sumw - m1*m2;
1020  corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1021 
1022  // create histogram required for computeation of mutual information
1023  TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1024  TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1025 
1026  // compute truncated RMS and fill histogram
1027  Double_t devMax = bias + 2*rms;
1028  Double_t devMin = bias - 2*rms;
1029  sumw = 0;
1030  int ic=0;
1031  for (Long64_t ievt=0; ievt<nevt; ievt++) {
1032  Float_t d = (rV[ievt] - tV[ievt]);
1033  hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1034  if (d >= devMin && d <= devMax) {
1035  sumw += wV[ievt];
1036  biasT += wV[ievt] * d;
1037  devT += wV[ievt] * TMath::Abs(d);
1038  rmsT += wV[ievt] * d * d;
1039  histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1040  ic++;
1041  }
1042  }
1043  biasT /= sumw;
1044  devT /= sumw;
1045  rmsT /= sumw;
1046  rmsT = TMath::Sqrt(rmsT - biasT*biasT);
1047  mInf = gTools().GetMutualInformation( *hist );
1048  mInfT = gTools().GetMutualInformation( *histT );
1049 
1050  delete hist;
1051  delete histT;
1052 
1053  delete [] rV;
1054  delete [] tV;
1055  delete [] wV;
1056 
1057  Data()->SetCurrentType(savedType);
1058 }
1059 
1060 
1061 ////////////////////////////////////////////////////////////////////////////////
1062 /// test multiclass classification
1063 
1065 {
1067  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1068  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test data..." << Endl;
1069  for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1070  resMulticlass->GetBestMultiClassCuts(icls);
1071  }
1072 }
1073 
1074 
1075 ////////////////////////////////////////////////////////////////////////////////
1076 /// initialization
1077 
1079 {
1081 
1082  ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
1084 
1085  // sanity checks: tree must exist, and theVar must be in tree
1086  if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1087  Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1088  << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1089  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1090  << " not found in tree" << Endl;
1091  }
1092 
1093  // basic statistics operations are made in base class
1096 
1097  // choose reasonable histogram ranges, by removing outliers
1098  Double_t nrms = 10;
1099  fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1100  fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1101 
1102  // determine cut orientation
1104 
1105  // fill 2 types of histograms for the various analyses
1106  // this one is for actual plotting
1107 
1108  Double_t sxmax = fXmax+0.00001;
1109 
1110  // classifier response distributions for training sample
1111  // MVA plots used for graphics representation (signal)
1112  TString TestvarName;
1113  if(IsSilentFile())
1114  {
1115  TestvarName=Form("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1116  }else
1117  {
1118  TestvarName=GetTestvarName();
1119  }
1120  TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1121  TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1122  mvaRes->Store(mva_s, "MVA_S");
1123  mvaRes->Store(mva_b, "MVA_B");
1124  mva_s->Sumw2();
1125  mva_b->Sumw2();
1126 
1127  TH1* proba_s = 0;
1128  TH1* proba_b = 0;
1129  TH1* rarity_s = 0;
1130  TH1* rarity_b = 0;
1131  if (HasMVAPdfs()) {
1132  // P(MVA) plots used for graphics representation
1133  proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1134  proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1135  mvaRes->Store(proba_s, "Prob_S");
1136  mvaRes->Store(proba_b, "Prob_B");
1137  proba_s->Sumw2();
1138  proba_b->Sumw2();
1139 
1140  // R(MVA) plots used for graphics representation
1141  rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1142  rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1143  mvaRes->Store(rarity_s, "Rar_S");
1144  mvaRes->Store(rarity_b, "Rar_B");
1145  rarity_s->Sumw2();
1146  rarity_b->Sumw2();
1147  }
1148 
1149  // MVA plots used for efficiency calculations (large number of bins)
1150  TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1151  TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1152  mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1153  mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1154  mva_eff_s->Sumw2();
1155  mva_eff_b->Sumw2();
1156 
1157  // fill the histograms
1158 
1159  ResultsClassification* mvaProb = dynamic_cast<ResultsClassification*>
1161 
1162  Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1163  if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1164  std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1165 
1166  //LM: this is needed to avoid crashes in ROOCCURVE
1167  if ( mvaRes->GetSize() != GetNEvents() ) {
1168  Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1169  assert(mvaRes->GetSize() == GetNEvents());
1170  }
1171 
1172  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1173 
1174  const Event* ev = GetEvent(ievt);
1175  Float_t v = (*mvaRes)[ievt][0];
1176  Float_t w = ev->GetWeight();
1177 
1178  if (DataInfo().IsSignal(ev)) {
1179  mvaResTypes->push_back(kTRUE);
1180  mva_s ->Fill( v, w );
1181  if (mvaProb) {
1182  proba_s->Fill( (*mvaProb)[ievt][0], w );
1183  rarity_s->Fill( GetRarity( v ), w );
1184  }
1185 
1186  mva_eff_s ->Fill( v, w );
1187  }
1188  else {
1189  mvaResTypes->push_back(kFALSE);
1190  mva_b ->Fill( v, w );
1191  if (mvaProb) {
1192  proba_b->Fill( (*mvaProb)[ievt][0], w );
1193  rarity_b->Fill( GetRarity( v ), w );
1194  }
1195  mva_eff_b ->Fill( v, w );
1196  }
1197  }
1198 
1199  // uncomment those (and several others if you want unnormalized output
1200  gTools().NormHist( mva_s );
1201  gTools().NormHist( mva_b );
1202  gTools().NormHist( proba_s );
1203  gTools().NormHist( proba_b );
1204  gTools().NormHist( rarity_s );
1205  gTools().NormHist( rarity_b );
1206  gTools().NormHist( mva_eff_s );
1207  gTools().NormHist( mva_eff_b );
1208 
1209  // create PDFs from histograms, using default splines, and no additional smoothing
1210  if (fSplS) { delete fSplS; fSplS = 0; }
1211  if (fSplB) { delete fSplB; fSplB = 0; }
1212  fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1213  fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1214 }
1215 
1216 ////////////////////////////////////////////////////////////////////////////////
1217 /// general method used in writing the header of the weight files where
1218 /// the used variables, variable transformation type etc. is specified
1219 
1220 void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1221 {
1222  TString prefix = "";
1223  UserGroup_t * userInfo = gSystem->GetUserInfo();
1224 
1225  tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1226  tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1227  tf.setf(std::ios::left);
1228  tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1229  << GetTrainingTMVAVersionCode() << "]" << std::endl;
1230  tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1231  << GetTrainingROOTVersionCode() << "]" << std::endl;
1232  tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1233  tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1234  tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1235  tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1236  tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1237 
1238  TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1239 
1240  tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1241  tf << prefix << std::endl;
1242 
1243  delete userInfo;
1244 
1245  // First write all options
1246  tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1247  WriteOptionsToStream( tf, prefix );
1248  tf << prefix << std::endl;
1249 
1250  // Second write variable info
1251  tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1252  WriteVarsToStream( tf, prefix );
1253  tf << prefix << std::endl;
1254 }
1255 
1256 ////////////////////////////////////////////////////////////////////////////////
1257 /// xml writing
1258 
1259 void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1260 {
1261  void* it = gTools().AddChild(gi,"Info");
1262  gTools().AddAttr(it,"name", name);
1263  gTools().AddAttr(it,"value", value);
1264 }
1265 
1266 ////////////////////////////////////////////////////////////////////////////////
1267 
1269  if (analysisType == Types::kRegression) {
1270  AddRegressionOutput( type );
1271  } else if (analysisType == Types::kMulticlass) {
1272  AddMulticlassOutput( type );
1273  } else {
1274  AddClassifierOutput( type );
1275  if (HasMVAPdfs())
1276  AddClassifierOutputProb( type );
1277  }
1278 }
1279 
1280 ////////////////////////////////////////////////////////////////////////////////
1281 /// general method used in writing the header of the weight files where
1282 /// the used variables, variable transformation type etc. is specified
1283 
1284 void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1285 {
1286  if (!parent) return;
1287 
1288  UserGroup_t* userInfo = gSystem->GetUserInfo();
1289 
1290  void* gi = gTools().AddChild(parent, "GeneralInfo");
1291  AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1292  AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1293  AddInfoItem( gi, "Creator", userInfo->fUser);
1294  TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1295  AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1296  AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1297  AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1298  AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1299 
1300  Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1301  TString analysisType((aType==Types::kRegression) ? "Regression" :
1302  (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1303  AddInfoItem( gi, "AnalysisType", analysisType );
1304  delete userInfo;
1305 
1306  // write options
1307  AddOptionsXMLTo( parent );
1308 
1309  // write variable info
1310  AddVarsXMLTo( parent );
1311 
1312  // write spectator info
1313  if (fModelPersistence)
1314  AddSpectatorsXMLTo( parent );
1315 
1316  // write class info if in multiclass mode
1317  AddClassesXMLTo(parent);
1318 
1319  // write target info if in regression mode
1320  if (DoRegression()) AddTargetsXMLTo(parent);
1321 
1322  // write transformations
1323  GetTransformationHandler(false).AddXMLTo( parent );
1324 
1325  // write MVA variable distributions
1326  void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1327  if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1328  if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1329 
1330  // write weights
1331  AddWeightsXMLTo( parent );
1332 }
1333 
1334 ////////////////////////////////////////////////////////////////////////////////
1335 /// write reference MVA distributions (and other information)
1336 /// to a ROOT type weight file
1337 
1339 {
1340  Bool_t addDirStatus = TH1::AddDirectoryStatus();
1341  TH1::AddDirectory( 0 ); // this avoids the binding of the hists in PDF to the current ROOT file
1342  fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1343  fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1344 
1345  TH1::AddDirectory( addDirStatus );
1346 
1347  ReadWeightsFromStream( rf );
1348 
1349  SetTestvarName();
1350 }
1351 
1352 ////////////////////////////////////////////////////////////////////////////////
1353 /// write options and weights to file
1354 /// note that each one text file for the main configuration information
1355 /// and one ROOT file for ROOT objects are created
1356 
1358 {
1359  // ---- create the text file
1360  TString tfname( GetWeightFileName() );
1361 
1362  // writing xml file
1363  TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1364  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1365  << "Creating xml weight file: "
1366  << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1367  void* doc = gTools().xmlengine().NewDoc();
1368  void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1369  gTools().xmlengine().DocSetRootElement(doc,rootnode);
1370  gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1371  WriteStateToXML(rootnode);
1372  gTools().xmlengine().SaveDoc(doc,xmlfname);
1373  gTools().xmlengine().FreeDoc(doc);
1374 }
1375 
1376 ////////////////////////////////////////////////////////////////////////////////
1377 /// Function to write options and weights to file
1378 
1380 {
1381  // get the filename
1382 
1383  TString tfname(GetWeightFileName());
1384 
1385  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1386  << "Reading weight file: "
1387  << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1388 
1389  if (tfname.EndsWith(".xml") ) {
1390 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,29,0)
1391  void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1392 #else
1393  void* doc = gTools().xmlengine().ParseFile(tfname);
1394 #endif
1395  void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1396  ReadStateFromXML(rootnode);
1397  gTools().xmlengine().FreeDoc(doc);
1398  }
1399  else {
1400  std::filebuf fb;
1401  fb.open(tfname.Data(),std::ios::in);
1402  if (!fb.is_open()) { // file not found --> Error
1403  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1404  << "Unable to open input weight file: " << tfname << Endl;
1405  }
1406  std::istream fin(&fb);
1407  ReadStateFromStream(fin);
1408  fb.close();
1409  }
1410  if (!fTxtWeightsOnly) {
1411  // ---- read the ROOT file
1412  TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1413  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1414  << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1415  TFile* rfile = TFile::Open( rfname, "READ" );
1416  ReadStateFromStream( *rfile );
1417  rfile->Close();
1418  }
1419 }
1420 ////////////////////////////////////////////////////////////////////////////////
1421 /// for reading from memory
1422 
1423 void TMVA::MethodBase::ReadStateFromXMLString( const char* xmlstr ) {
1424 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,26,00)
1425  void* doc = gTools().xmlengine().ParseString(xmlstr);
1426  void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1427  ReadStateFromXML(rootnode);
1428  gTools().xmlengine().FreeDoc(doc);
1429 #else
1430  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Method MethodBase::ReadStateFromXMLString( const char* xmlstr = "
1431  << xmlstr << " ) is not available for ROOT versions prior to 5.26/00." << Endl;
1432 #endif
1433 
1434  return;
1435 }
1436 
1437 ////////////////////////////////////////////////////////////////////////////////
1438 
1439 void TMVA::MethodBase::ReadStateFromXML( void* methodNode )
1440 {
1441  TString fullMethodName;
1442  gTools().ReadAttr( methodNode, "Method", fullMethodName );
1443  fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1444 
1445  // update logger
1446  Log().SetSource( GetName() );
1447  Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1448  << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1449 
1450  // after the method name is read, the testvar can be set
1451  SetTestvarName();
1452 
1453  TString nodeName("");
1454  void* ch = gTools().GetChild(methodNode);
1455  while (ch!=0) {
1456  nodeName = TString( gTools().GetName(ch) );
1457 
1458  if (nodeName=="GeneralInfo") {
1459  // read analysis type
1460 
1461  TString name(""),val("");
1462  void* antypeNode = gTools().GetChild(ch);
1463  while (antypeNode) {
1464  gTools().ReadAttr( antypeNode, "name", name );
1465 
1466  if (name == "TrainingTime")
1467  gTools().ReadAttr( antypeNode, "value", fTrainTime );
1468 
1469  if (name == "AnalysisType") {
1470  gTools().ReadAttr( antypeNode, "value", val );
1471  val.ToLower();
1472  if (val == "regression" ) SetAnalysisType( Types::kRegression );
1473  else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1474  else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1475  else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1476  }
1477 
1478  if (name == "TMVA Release" || name == "TMVA") {
1479  TString s;
1480  gTools().ReadAttr( antypeNode, "value", s);
1481  fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1482  Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1483  }
1484 
1485  if (name == "ROOT Release" || name == "ROOT") {
1486  TString s;
1487  gTools().ReadAttr( antypeNode, "value", s);
1488  fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1489  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1490  << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1491  }
1492  antypeNode = gTools().GetNextChild(antypeNode);
1493  }
1494  }
1495  else if (nodeName=="Options") {
1496  ReadOptionsFromXML(ch);
1497  ParseOptions();
1498 
1499  }
1500  else if (nodeName=="Variables") {
1502  }
1503  else if (nodeName=="Spectators") {
1505  }
1506  else if (nodeName=="Classes") {
1507  if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1508  }
1509  else if (nodeName=="Targets") {
1510  if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1511  }
1512  else if (nodeName=="Transformations") {
1514  }
1515  else if (nodeName=="MVAPdfs") {
1516  TString pdfname;
1517  if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1518  if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1519  void* pdfnode = gTools().GetChild(ch);
1520  if (pdfnode) {
1521  gTools().ReadAttr(pdfnode, "Name", pdfname);
1522  fMVAPdfS = new PDF(pdfname);
1523  fMVAPdfS->ReadXML(pdfnode);
1524  pdfnode = gTools().GetNextChild(pdfnode);
1525  gTools().ReadAttr(pdfnode, "Name", pdfname);
1526  fMVAPdfB = new PDF(pdfname);
1527  fMVAPdfB->ReadXML(pdfnode);
1528  }
1529  }
1530  else if (nodeName=="Weights") {
1531  ReadWeightsFromXML(ch);
1532  }
1533  else {
1534  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1535  }
1536  ch = gTools().GetNextChild(ch);
1537 
1538  }
1539 
1540  // update transformation handler
1541  if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1542 }
1543 
1544 ////////////////////////////////////////////////////////////////////////////////
1545 /// read the header from the weight files of the different MVA methods
1546 
1547 void TMVA::MethodBase::ReadStateFromStream( std::istream& fin )
1548 {
1549  char buf[512];
1550 
1551  // when reading from stream, we assume the files are produced with TMVA<=397
1553 
1554 
1555  // first read the method name
1556  GetLine(fin,buf);
1557  while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1558  TString namestr(buf);
1559 
1560  TString methodType = namestr(0,namestr.Index("::"));
1561  methodType = methodType(methodType.Last(' '),methodType.Length());
1562  methodType = methodType.Strip(TString::kLeading);
1563 
1564  TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1565  methodName = methodName.Strip(TString::kLeading);
1566  if (methodName == "") methodName = methodType;
1567  fMethodName = methodName;
1568 
1569  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1570 
1571  // update logger
1572  Log().SetSource( GetName() );
1573 
1574  // now the question is whether to read the variables first or the options (well, of course the order
1575  // of writing them needs to agree)
1576  //
1577  // the option "Decorrelation" is needed to decide if the variables we
1578  // read are decorrelated or not
1579  //
1580  // the variables are needed by some methods (TMLP) to build the NN
1581  // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1582  // we read the variables, and then we process the options
1583 
1584  // now read all options
1585  GetLine(fin,buf);
1586  while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1587  ReadOptionsFromStream(fin);
1588  ParseOptions();
1589 
1590  // Now read variable info
1591  fin.getline(buf,512);
1592  while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1593  ReadVarsFromStream(fin);
1594 
1595  // now we process the options (of the derived class)
1596  ProcessOptions();
1597 
1598  if (IsNormalised()) {
1601  norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1602  }
1603  VariableTransformBase *varTrafo(0), *varTrafo2(0);
1604  if ( fVarTransformString == "None") {
1605  if (fUseDecorr)
1607  } else if ( fVarTransformString == "Decorrelate" ) {
1609  } else if ( fVarTransformString == "PCA" ) {
1611  } else if ( fVarTransformString == "Uniform" ) {
1612  varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1613  } else if ( fVarTransformString == "Gauss" ) {
1615  } else if ( fVarTransformString == "GaussDecorr" ) {
1618  } else {
1619  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1620  << fVarTransformString << "' unknown." << Endl;
1621  }
1622  // Now read decorrelation matrix if available
1623  if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1624  fin.getline(buf,512);
1625  while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1626  if (varTrafo) {
1628  varTrafo->ReadTransformationFromStream(fin, trafo );
1629  }
1630  if (varTrafo2) {
1632  varTrafo2->ReadTransformationFromStream(fin, trafo );
1633  }
1634  }
1635 
1636 
1637  if (HasMVAPdfs()) {
1638  // Now read the MVA PDFs
1639  fin.getline(buf,512);
1640  while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1641  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1642  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1643  fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1644  fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1647 
1648  fin >> *fMVAPdfS;
1649  fin >> *fMVAPdfB;
1650  }
1651 
1652  // Now read weights
1653  fin.getline(buf,512);
1654  while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1655  fin.getline(buf,512);
1656  ReadWeightsFromStream( fin );;
1657 
1658  // update transformation handler
1659  if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1660 
1661 }
1662 
1663 ////////////////////////////////////////////////////////////////////////////////
1664 /// write the list of variables (name, min, max) for a given data
1665 /// transformation method to the stream
1666 
1667 void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1668 {
1669  o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1670  std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1671  for (; varIt!=DataInfo().GetVariableInfos().end(); varIt++) { o << prefix; varIt->WriteToStream(o); }
1672  o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1673  varIt = DataInfo().GetSpectatorInfos().begin();
1674  for (; varIt!=DataInfo().GetSpectatorInfos().end(); varIt++) { o << prefix; varIt->WriteToStream(o); }
1675 }
1676 
1677 ////////////////////////////////////////////////////////////////////////////////
1678 /// Read the variables (name, min, max) for a given data
1679 /// transformation method from the stream. In the stream we only
1680 /// expect the limits which will be set
1681 
1682 void TMVA::MethodBase::ReadVarsFromStream( std::istream& istr )
1683 {
1684  TString dummy;
1685  UInt_t readNVar;
1686  istr >> dummy >> readNVar;
1687 
1688  if (readNVar!=DataInfo().GetNVariables()) {
1689  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1690  << " while there are " << readNVar << " variables declared in the file"
1691  << Endl;
1692  }
1693 
1694  // we want to make sure all variables are read in the order they are defined
1695  VariableInfo varInfo;
1696  std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1697  int varIdx = 0;
1698  for (; varIt!=DataInfo().GetVariableInfos().end(); varIt++, varIdx++) {
1699  varInfo.ReadFromStream(istr);
1700  if (varIt->GetExpression() == varInfo.GetExpression()) {
1701  varInfo.SetExternalLink((*varIt).GetExternalLink());
1702  (*varIt) = varInfo;
1703  }
1704  else {
1705  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1706  Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1707  Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1708  Log() << kINFO << "the correct working of the method):" << Endl;
1709  Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1710  Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1711  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1712  }
1713  }
1714 }
1715 
1716 ////////////////////////////////////////////////////////////////////////////////
1717 /// write variable info to XML
1718 
1719 void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1720 {
1721  void* vars = gTools().AddChild(parent, "Variables");
1722  gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1723 
1724  for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1725  VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1726  void* var = gTools().AddChild( vars, "Variable" );
1727  gTools().AddAttr( var, "VarIndex", idx );
1728  vi.AddToXML( var );
1729  }
1730 }
1731 
1732 ////////////////////////////////////////////////////////////////////////////////
1733 /// write spectator info to XML
1734 
1735 void TMVA::MethodBase::AddSpectatorsXMLTo( void* parent ) const
1736 {
1737  void* specs = gTools().AddChild(parent, "Spectators");
1738 
1739  UInt_t writeIdx=0;
1740  for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1741 
1742  VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1743 
1744  // we do not want to write spectators that are category-cuts,
1745  // except if the method is the category method and the spectators belong to it
1746  if (vi.GetVarType()=='C') continue;
1747 
1748  void* spec = gTools().AddChild( specs, "Spectator" );
1749  gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1750  vi.AddToXML( spec );
1751  }
1752  gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1753 }
1754 
1755 ////////////////////////////////////////////////////////////////////////////////
1756 /// write class info to XML
1757 
1758 void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1759 {
1760  UInt_t nClasses=DataInfo().GetNClasses();
1761 
1762  void* classes = gTools().AddChild(parent, "Classes");
1763  gTools().AddAttr( classes, "NClass", nClasses );
1764 
1765  for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1766  ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1767  TString className =classInfo->GetName();
1768  UInt_t classNumber=classInfo->GetNumber();
1769 
1770  void* classNode=gTools().AddChild(classes, "Class");
1771  gTools().AddAttr( classNode, "Name", className );
1772  gTools().AddAttr( classNode, "Index", classNumber );
1773  }
1774 }
1775 ////////////////////////////////////////////////////////////////////////////////
1776 /// write target info to XML
1777 
1778 void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1779 {
1780  void* targets = gTools().AddChild(parent, "Targets");
1781  gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1782 
1783  for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1784  VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1785  void* tar = gTools().AddChild( targets, "Target" );
1786  gTools().AddAttr( tar, "TargetIndex", idx );
1787  vi.AddToXML( tar );
1788  }
1789 }
1790 
1791 ////////////////////////////////////////////////////////////////////////////////
1792 /// read variable info from XML
1793 
1795 {
1796  UInt_t readNVar;
1797  gTools().ReadAttr( varnode, "NVar", readNVar);
1798 
1799  if (readNVar!=DataInfo().GetNVariables()) {
1800  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1801  << " while there are " << readNVar << " variables declared in the file"
1802  << Endl;
1803  }
1804 
1805  // we want to make sure all variables are read in the order they are defined
1806  VariableInfo readVarInfo, existingVarInfo;
1807  int varIdx = 0;
1808  void* ch = gTools().GetChild(varnode);
1809  while (ch) {
1810  gTools().ReadAttr( ch, "VarIndex", varIdx);
1811  existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1812  readVarInfo.ReadFromXML(ch);
1813 
1814  if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1815  readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1816  existingVarInfo = readVarInfo;
1817  }
1818  else {
1819  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1820  Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1821  Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1822  Log() << kINFO << "correct working of the method):" << Endl;
1823  Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1824  Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1825  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1826  }
1827  ch = gTools().GetNextChild(ch);
1828  }
1829 }
1830 
1831 ////////////////////////////////////////////////////////////////////////////////
1832 /// read spectator info from XML
1833 
1835 {
1836  UInt_t readNSpec;
1837  gTools().ReadAttr( specnode, "NSpec", readNSpec);
1838 
1839  if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1840  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1841  << " while there are " << readNSpec << " spectators declared in the file"
1842  << Endl;
1843  }
1844 
1845  // we want to make sure all variables are read in the order they are defined
1846  VariableInfo readSpecInfo, existingSpecInfo;
1847  int specIdx = 0;
1848  void* ch = gTools().GetChild(specnode);
1849  while (ch) {
1850  gTools().ReadAttr( ch, "SpecIndex", specIdx);
1851  existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1852  readSpecInfo.ReadFromXML(ch);
1853 
1854  if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1855  readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1856  existingSpecInfo = readSpecInfo;
1857  }
1858  else {
1859  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1860  Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1861  Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1862  Log() << kINFO << "correct working of the method):" << Endl;
1863  Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1864  Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1865  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1866  }
1867  ch = gTools().GetNextChild(ch);
1868  }
1869 }
1870 
1871 ////////////////////////////////////////////////////////////////////////////////
1872 /// read number of classes from XML
1873 
1875 {
1876  UInt_t readNCls;
1877  // coverity[tainted_data_argument]
1878  gTools().ReadAttr( clsnode, "NClass", readNCls);
1879 
1880  TString className="";
1881  UInt_t classIndex=0;
1882  void* ch = gTools().GetChild(clsnode);
1883  if (!ch) {
1884  for (UInt_t icls = 0; icls<readNCls;++icls) {
1885  TString classname = Form("class%i",icls);
1886  DataInfo().AddClass(classname);
1887 
1888  }
1889  }
1890  else{
1891  while (ch) {
1892  gTools().ReadAttr( ch, "Index", classIndex);
1893  gTools().ReadAttr( ch, "Name", className );
1894  DataInfo().AddClass(className);
1895 
1896  ch = gTools().GetNextChild(ch);
1897  }
1898  }
1899 
1900  // retrieve signal and background class index
1901  if (DataInfo().GetClassInfo("Signal") != 0) {
1902  fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1903  }
1904  else
1905  fSignalClass=0;
1906  if (DataInfo().GetClassInfo("Background") != 0) {
1907  fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
1908  }
1909  else
1910  fBackgroundClass=1;
1911 }
1912 
1913 ////////////////////////////////////////////////////////////////////////////////
1914 /// read target info from XML
1915 
1917 {
1918  UInt_t readNTar;
1919  gTools().ReadAttr( tarnode, "NTrgt", readNTar);
1920 
1921  int tarIdx = 0;
1922  TString expression;
1923  void* ch = gTools().GetChild(tarnode);
1924  while (ch) {
1925  gTools().ReadAttr( ch, "TargetIndex", tarIdx);
1926  gTools().ReadAttr( ch, "Expression", expression);
1927  DataInfo().AddTarget(expression,"","",0,0);
1928 
1929  ch = gTools().GetNextChild(ch);
1930  }
1931 }
1932 
1933 ////////////////////////////////////////////////////////////////////////////////
1934 /// returns the ROOT directory where info/histograms etc of the
1935 /// corresponding MVA method instance are stored
1936 
1938 {
1939  if (fBaseDir != 0) return fBaseDir;
1940  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
1941 
1942  TDirectory* methodDir = MethodBaseDir();
1943  if (methodDir==0)
1944  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
1945 
1946  TString defaultDir = GetMethodName();
1947  TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
1948  if(!sdir)
1949  {
1950  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
1951  sdir = methodDir->mkdir(defaultDir);
1952  sdir->cd();
1953  // write weight file name into target file
1954  TObjString wfilePath( gSystem->WorkingDirectory() );
1955  TObjString wfileName( GetWeightFileName() );
1956  wfilePath.Write( "TrainingPath" );
1957  wfileName.Write( "WeightFileName" );
1958  }
1959 
1960  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
1961  return sdir;
1962 }
1963 
1964 ////////////////////////////////////////////////////////////////////////////////
1965 /// returns the ROOT directory where all instances of the
1966 /// corresponding MVA method are stored
1967 
1969  {
1970  if (fMethodBaseDir != 0) return fMethodBaseDir;
1971 
1972  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " not set yet --> check if already there.." <<Endl;
1973 
1974 
1975  TDirectory *fFactoryBaseDir=GetFile();
1976 
1977  fMethodBaseDir = fFactoryBaseDir->GetDirectory(DataInfo().GetName());
1978  if(!fMethodBaseDir) //creating dataset directory
1979  {
1980  fMethodBaseDir = fFactoryBaseDir->mkdir(DataInfo().GetName(),Form("Base directory for dataset %s",DataInfo().GetName()));
1981  if(!fMethodBaseDir)Log()<<kFATAL<<"Can not create dir "<<DataInfo().GetName();
1982  }
1983  TString _methodDir = Form("Method_%s",GetMethodName().Data());
1985 
1986  if(!fMethodBaseDir){
1987  fMethodBaseDir = fFactoryBaseDir->GetDirectory(DataInfo().GetName())->mkdir(_methodDir.Data(),Form("Directory for all %s methods", GetMethodTypeName().Data()));
1988  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " does not exist yet--> created it" <<Endl;
1989  }
1990 
1991  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<"Return from MethodBaseDir() after creating base directory "<<Endl;
1992  return fMethodBaseDir;
1993 }
1994 
1995 ////////////////////////////////////////////////////////////////////////////////
1996 /// set directory of weight file
1997 
1999 {
2000  fFileDir = fileDir;
2002 }
2003 
2004 ////////////////////////////////////////////////////////////////////////////////
2005 /// set the weight file name (depreciated)
2006 
2008 {
2009  fWeightFile = theWeightFile;
2010 }
2011 
2012 ////////////////////////////////////////////////////////////////////////////////
2013 /// retrieve weight file name
2014 
2016 {
2017  if (fWeightFile!="") return fWeightFile;
2018 
2019  // the default consists of
2020  // directory/jobname_methodname_suffix.extension.{root/txt}
2021  TString suffix = "";
2022  TString wFileDir(GetWeightFileDir());
2023  return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2024  + GetJobName() + "_" + GetMethodName() +
2025  suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml" );
2026 }
2027 
2028 ////////////////////////////////////////////////////////////////////////////////
2029 /// writes all MVA evaluation histograms to file
2030 
2032 {
2033  BaseDir()->cd();
2034 
2035 
2036  // write MVA PDFs to file - if exist
2037  if (0 != fMVAPdfS) {
2040  fMVAPdfS->GetPDFHist()->Write();
2041  }
2042  if (0 != fMVAPdfB) {
2045  fMVAPdfB->GetPDFHist()->Write();
2046  }
2047 
2048  // write result-histograms
2049  Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2050  if (!results)
2051  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2052  << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2053  << "/kMaxAnalysisType" << Endl;
2054  results->GetStorage()->Write();
2055  if (treetype==Types::kTesting) {
2057  }
2058 }
2059 
2060 ////////////////////////////////////////////////////////////////////////////////
2061 /// write special monitoring histograms to file
2062 /// dummy implementation here -----------------
2063 
2065 {
2066 }
2067 
2068 ////////////////////////////////////////////////////////////////////////////////
2069 /// reads one line from the input stream
2070 /// checks for certain keywords and interprets
2071 /// the line if keywords are found
2072 
2073 Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2074 {
2075  fin.getline(buf,512);
2076  TString line(buf);
2077  if (line.BeginsWith("TMVA Release")) {
2078  Ssiz_t start = line.First('[')+1;
2079  Ssiz_t length = line.Index("]",start)-start;
2080  TString code = line(start,length);
2081  std::stringstream s(code.Data());
2082  s >> fTMVATrainingVersion;
2083  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2084  }
2085  if (line.BeginsWith("ROOT Release")) {
2086  Ssiz_t start = line.First('[')+1;
2087  Ssiz_t length = line.Index("]",start)-start;
2088  TString code = line(start,length);
2089  std::stringstream s(code.Data());
2090  s >> fROOTTrainingVersion;
2091  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2092  }
2093  if (line.BeginsWith("Analysis type")) {
2094  Ssiz_t start = line.First('[')+1;
2095  Ssiz_t length = line.Index("]",start)-start;
2096  TString code = line(start,length);
2097  std::stringstream s(code.Data());
2098  std::string analysisType;
2099  s >> analysisType;
2100  if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2101  else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2102  else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2103  else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2104 
2105  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2106  << (GetAnalysisType() == Types::kRegression ? "Regression" :
2107  (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2108  }
2109 
2110  return true;
2111 }
2112 
2113 ////////////////////////////////////////////////////////////////////////////////
2114 /// Create PDFs of the MVA output variables
2115 
2117 {
2119 
2120  // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2121  // otherwise they will be only used 'online'
2122  ResultsClassification * mvaRes = dynamic_cast<ResultsClassification*>
2124 
2125  if (mvaRes==0 || mvaRes->GetSize()==0) {
2126  Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2127  }
2128 
2129  Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2130  Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2131 
2132  // create histograms that serve as basis to create the MVA Pdfs
2133  TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2134  fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2135  TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2136  fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2137 
2138 
2139  // compute sum of weights properly
2140  histMVAPdfS->Sumw2();
2141  histMVAPdfB->Sumw2();
2142 
2143  // fill histograms
2144  for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2145  Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2146  Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2147 
2148  if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2149  else histMVAPdfB->Fill( theVal, theWeight );
2150  }
2151 
2152  gTools().NormHist( histMVAPdfS );
2153  gTools().NormHist( histMVAPdfB );
2154 
2155  // momentary hack for ROOT problem
2156  if(!IsSilentFile())
2157  {
2158  histMVAPdfS->Write();
2159  histMVAPdfB->Write();
2160  }
2161  // create PDFs
2162  fMVAPdfS->BuildPDF ( histMVAPdfS );
2163  fMVAPdfB->BuildPDF ( histMVAPdfB );
2164  fMVAPdfS->ValidatePDF( histMVAPdfS );
2165  fMVAPdfB->ValidatePDF( histMVAPdfB );
2166 
2167  if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2168  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2169  << Form( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2170  GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2171  << Endl;
2172  }
2173 
2174  delete histMVAPdfS;
2175  delete histMVAPdfB;
2176 }
2177 
2179  // the simple one, automatically calcualtes the mvaVal and uses the
2180  // SAME sig/bkg ratio as given in the training sample (typically 50/50
2181  // .. (NormMode=EqualNumEvents) but can be different)
2182  if (!fMVAPdfS || !fMVAPdfB) {
2183  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2184  CreateMVAPdfs();
2185  }
2187  Double_t mvaVal = GetMvaValue(ev);
2188 
2189  return GetProba(mvaVal,sigFraction);
2190 
2191 }
2192 ////////////////////////////////////////////////////////////////////////////////
2193 /// compute likelihood ratio
2194 
2196 {
2197  if (!fMVAPdfS || !fMVAPdfB) {
2198  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2199  return -1.0;
2200  }
2201  Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2202  Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2203 
2204  Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2205 
2206  return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2207 }
2208 
2209 ////////////////////////////////////////////////////////////////////////////////
2210 /// compute rarity:
2211 /// R(x) = Integrate_[-oo..x] { PDF(x') dx' }
2212 /// where PDF(x) is the PDF of the classifier's signal or background distribution
2213 
2215 {
2216  if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2217  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Backgroud does not exist: "
2218  << "select option \"CreateMVAPdfs\"" << Endl;
2219  return 0.0;
2220  }
2221 
2222  PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2223 
2224  return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2225 }
2226 
2227 ////////////////////////////////////////////////////////////////////////////////
2228 /// fill background efficiency (resp. rejection) versus signal efficiency plots
2229 /// returns signal efficiency at background efficiency indicated in theString
2230 
2232 {
2233  Data()->SetCurrentType(type);
2234  Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2235  std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2236 
2237  // parse input string for required background efficiency
2238  TList* list = gTools().ParseFormatLine( theString );
2239 
2240  // sanity check
2241  Bool_t computeArea = kFALSE;
2242  if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2243  else if (list->GetSize() > 2) {
2244  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2245  << " in string: " << theString
2246  << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2247  delete list;
2248  return -1;
2249  }
2250 
2251  // sanity check
2252  if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2253  results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2254  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2255  delete list;
2256  return -1.0;
2257  }
2258 
2259  // create histograms
2260 
2261  // first, get efficiency histograms for signal and background
2262  TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2263  Double_t xmin = effhist->GetXaxis()->GetXmin();
2264  Double_t xmax = effhist->GetXaxis()->GetXmax();
2265 
2266  TTHREAD_TLS(Double_t) nevtS;
2267 
2268  // first round ? --> create histograms
2269  if (results->DoesExist("MVA_EFF_S")==0) {
2270 
2271  // for efficiency plot
2272  TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2273  TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2274  results->Store(eff_s, "MVA_EFF_S");
2275  results->Store(eff_b, "MVA_EFF_B");
2276 
2277  // sign if cut
2278  Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2279 
2280  // this method is unbinned
2281  nevtS = 0;
2282  for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2283 
2284  // read the tree
2285  Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2286  Float_t theWeight = GetEvent(ievt)->GetWeight();
2287  Float_t theVal = (*mvaRes)[ievt];
2288 
2289  // select histogram depending on if sig or bgd
2290  TH1* theHist = isSignal ? eff_s : eff_b;
2291 
2292  // count signal and background events in tree
2293  if (isSignal) nevtS+=theWeight;
2294 
2295  TAxis* axis = theHist->GetXaxis();
2296  Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2297  if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2298  if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2299  if (sign > 0 && maxbin < 1 ) maxbin = 1;
2300  if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2301 
2302  if (sign > 0)
2303  for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2304  else if (sign < 0)
2305  for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2306  else
2307  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2308  }
2309 
2310  // renormalise maximum to <=1
2311  // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2312  // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2313 
2316 
2317  // background efficiency versus signal efficiency
2318  TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2319  results->Store(eff_BvsS, "MVA_EFF_BvsS");
2320  eff_BvsS->SetXTitle( "Signal eff" );
2321  eff_BvsS->SetYTitle( "Backgr eff" );
2322 
2323  // background rejection (=1-eff.) versus signal efficiency
2324  TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2325  results->Store(rej_BvsS);
2326  rej_BvsS->SetXTitle( "Signal eff" );
2327  rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2328 
2329  // inverse background eff (1/eff.) versus signal efficiency
2330  TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2331  GetTestvarName(), fNbins, 0, 1 );
2332  results->Store(inveff_BvsS);
2333  inveff_BvsS->SetXTitle( "Signal eff" );
2334  inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2335 
2336  // use root finder
2337  // spline background efficiency plot
2338  // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2339  if (Use_Splines_for_Eff_) {
2340  fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2341  fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2342 
2343  // verify spline sanity
2344  gTools().CheckSplines( eff_s, fSplRefS );
2345  gTools().CheckSplines( eff_b, fSplRefB );
2346  }
2347 
2348  // make the background-vs-signal efficiency plot
2349 
2350  // create root finder
2351  RootFinder rootFinder( this, fXmin, fXmax );
2352 
2353  Double_t effB = 0;
2354  fEffS = eff_s; // to be set for the root finder
2355  for (Int_t bini=1; bini<=fNbins; bini++) {
2356 
2357  // find cut value corresponding to a given signal efficiency
2358  Double_t effS = eff_BvsS->GetBinCenter( bini );
2359  Double_t cut = rootFinder.Root( effS );
2360 
2361  // retrieve background efficiency for given cut
2362  if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2363  else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2364 
2365  // and fill histograms
2366  eff_BvsS->SetBinContent( bini, effB );
2367  rej_BvsS->SetBinContent( bini, 1.0-effB );
2369  inveff_BvsS->SetBinContent( bini, 1.0/effB );
2370  }
2371 
2372  // create splines for histogram
2373  fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2374 
2375  // search for overlap point where, when cutting on it,
2376  // one would obtain: eff_S = rej_B = 1 - eff_B
2377  Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2378  Int_t nbins_ = 5000;
2379  for (Int_t bini=1; bini<=nbins_; bini++) {
2380 
2381  // get corresponding signal and background efficiencies
2382  effS = (bini - 0.5)/Float_t(nbins_);
2383  rejB = 1.0 - fSpleffBvsS->Eval( effS );
2384 
2385  // find signal efficiency that corresponds to required background efficiency
2386  if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2387  effS_ = effS;
2388  rejB_ = rejB;
2389  }
2390 
2391  // find cut that corresponds to signal efficiency and update signal-like criterion
2392  Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2393  SetSignalReferenceCut( cut );
2394  fEffS = 0;
2395  }
2396 
2397  // must exist...
2398  if (0 == fSpleffBvsS) {
2399  delete list;
2400  return 0.0;
2401  }
2402 
2403  // now find signal efficiency that corresponds to required background efficiency
2404  Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2405  Int_t nbins_ = 1000;
2406 
2407  if (computeArea) {
2408 
2409  // compute area of rej-vs-eff plot
2410  Double_t integral = 0;
2411  for (Int_t bini=1; bini<=nbins_; bini++) {
2412 
2413  // get corresponding signal and background efficiencies
2414  effS = (bini - 0.5)/Float_t(nbins_);
2415  effB = fSpleffBvsS->Eval( effS );
2416  integral += (1.0 - effB);
2417  }
2418  integral /= nbins_;
2419 
2420  delete list;
2421  return integral;
2422  }
2423  else {
2424 
2425  // that will be the value of the efficiency retured (does not affect
2426  // the efficiency-vs-bkg plot which is done anyway.
2427  Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2428 
2429  // find precise efficiency value
2430  for (Int_t bini=1; bini<=nbins_; bini++) {
2431 
2432  // get corresponding signal and background efficiencies
2433  effS = (bini - 0.5)/Float_t(nbins_);
2434  effB = fSpleffBvsS->Eval( effS );
2435 
2436  // find signal efficiency that corresponds to required background efficiency
2437  if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2438  effS_ = effS;
2439  effB_ = effB;
2440  }
2441 
2442  // take mean between bin above and bin below
2443  effS = 0.5*(effS + effS_);
2444 
2445  effSerr = 0;
2446  if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2447 
2448  delete list;
2449  return effS;
2450  }
2451 
2452  return -1;
2453 }
2454 
2455 ////////////////////////////////////////////////////////////////////////////////
2456 
2458 {
2460 
2462 
2463  // fill background efficiency (resp. rejection) versus signal efficiency plots
2464  // returns signal efficiency at background efficiency indicated in theString
2465 
2466  // parse input string for required background efficiency
2467  TList* list = gTools().ParseFormatLine( theString );
2468  // sanity check
2469 
2470  if (list->GetSize() != 2) {
2471  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2472  << " in string: " << theString
2473  << " | required format, e.g., Efficiency:0.05" << Endl;
2474  delete list;
2475  return -1;
2476  }
2477  // that will be the value of the efficiency retured (does not affect
2478  // the efficiency-vs-bkg plot which is done anyway.
2479  Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2480 
2481  delete list;
2482 
2483  // sanity check
2484  if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2485  results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2486  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2487  << Endl;
2488  return -1.0;
2489  }
2490 
2491  // create histogram
2492 
2493  // first, get efficiency histograms for signal and background
2494  TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2495  Double_t xmin = effhist->GetXaxis()->GetXmin();
2496  Double_t xmax = effhist->GetXaxis()->GetXmax();
2497 
2498  // first round ? --> create and fill histograms
2499  if (results->DoesExist("MVA_TRAIN_S")==0) {
2500 
2501  // classifier response distributions for test sample
2502  Double_t sxmax = fXmax+0.00001;
2503 
2504  // MVA plots on the training sample (check for overtraining)
2505  TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2506  TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2507  results->Store(mva_s_tr, "MVA_TRAIN_S");
2508  results->Store(mva_b_tr, "MVA_TRAIN_B");
2509  mva_s_tr->Sumw2();
2510  mva_b_tr->Sumw2();
2511 
2512  // Training efficiency plots
2513  TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2514  fNbinsH, xmin, xmax );
2515  TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2516  fNbinsH, xmin, xmax );
2517  results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2518  results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2519 
2520  // sign if cut
2521  Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2522 
2523  std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2524  assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2525 
2526  // this method is unbinned
2527  for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2528 
2529  Data()->SetCurrentEvent(ievt);
2530  const Event* ev = GetEvent();
2531 
2532  Double_t theVal = mvaValues[ievt];
2533  Double_t theWeight = ev->GetWeight();
2534 
2535  TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2536  TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2537 
2538  theClsHist->Fill( theVal, theWeight );
2539 
2540  TAxis* axis = theEffHist->GetXaxis();
2541  Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2542  if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2543  if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2544  if (sign > 0 && maxbin < 1 ) maxbin = 1;
2545  if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2546 
2547  if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2548  else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2549  }
2550 
2551  // normalise output distributions
2552  // uncomment those (and several others if you want unnormalized output
2553  gTools().NormHist( mva_s_tr );
2554  gTools().NormHist( mva_b_tr );
2555 
2556  // renormalise to maximum
2557  mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2558  mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2559 
2560  // Training background efficiency versus signal efficiency
2561  TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2562  // Training background rejection (=1-eff.) versus signal efficiency
2563  TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2564  results->Store(eff_bvss, "EFF_BVSS_TR");
2565  results->Store(rej_bvss, "REJ_BVSS_TR");
2566 
2567  // use root finder
2568  // spline background efficiency plot
2569  // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2570  if (Use_Splines_for_Eff_) {
2571  if (fSplTrainRefS) delete fSplTrainRefS;
2572  if (fSplTrainRefB) delete fSplTrainRefB;
2573  fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2574  fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2575 
2576  // verify spline sanity
2577  gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2578  gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2579  }
2580 
2581  // make the background-vs-signal efficiency plot
2582 
2583  // create root finder
2584  RootFinder rootFinder(this, fXmin, fXmax );
2585 
2586  Double_t effB = 0;
2587  fEffS = results->GetHist("MVA_TRAINEFF_S");
2588  for (Int_t bini=1; bini<=fNbins; bini++) {
2589 
2590  // find cut value corresponding to a given signal efficiency
2591  Double_t effS = eff_bvss->GetBinCenter( bini );
2592 
2593  Double_t cut = rootFinder.Root( effS );
2594 
2595  // retrieve background efficiency for given cut
2596  if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2597  else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2598 
2599  // and fill histograms
2600  eff_bvss->SetBinContent( bini, effB );
2601  rej_bvss->SetBinContent( bini, 1.0-effB );
2602  }
2603  fEffS = 0;
2604 
2605  // create splines for histogram
2606  fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2607  }
2608 
2609  // must exist...
2610  if (0 == fSplTrainEffBvsS) return 0.0;
2611 
2612  // now find signal efficiency that corresponds to required background efficiency
2613  Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2614  Int_t nbins_ = 1000;
2615  for (Int_t bini=1; bini<=nbins_; bini++) {
2616 
2617  // get corresponding signal and background efficiencies
2618  effS = (bini - 0.5)/Float_t(nbins_);
2619  effB = fSplTrainEffBvsS->Eval( effS );
2620 
2621  // find signal efficiency that corresponds to required background efficiency
2622  if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2623  effS_ = effS;
2624  effB_ = effB;
2625  }
2626 
2627  return 0.5*(effS + effS_); // the mean between bin above and bin below
2628 }
2629 
2630 //_______________________________________________________________________
2631 
2632 
2633 std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2634 {
2637  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2638 
2639  purity.push_back(resMulticlass->GetAchievablePur());
2640  return resMulticlass->GetAchievableEff();
2641 }
2642 
2643 //_______________________________________________________________________
2644 
2645 std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2646 {
2649  if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2650 
2651  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2652  for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2653  resMulticlass->GetBestMultiClassCuts(icls);
2654  }
2655 
2656  purity.push_back(resMulticlass->GetAchievablePur());
2657  return resMulticlass->GetAchievableEff();
2658 }
2659 
2660 
2661 ////////////////////////////////////////////////////////////////////////////////
2662 /// compute significance of mean difference
2663 /// significance = |<S> - <B>|/Sqrt(RMS_S2 + RMS_B2)
2664 
2666 {
2667  Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2668 
2669  return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2670 }
2671 
2672 ////////////////////////////////////////////////////////////////////////////////
2673 /// compute "separation" defined as
2674 /// <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx }
2675 
2677 {
2678  return gTools().GetSeparation( histoS, histoB );
2679 }
2680 
2681 ////////////////////////////////////////////////////////////////////////////////
2682 /// compute "separation" defined as
2683 /// <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx }
2684 
2686 {
2687  // note, if zero pointers given, use internal pdf
2688  // sanity check first
2689  if ((!pdfS && pdfB) || (pdfS && !pdfB))
2690  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2691  if (!pdfS) pdfS = fSplS;
2692  if (!pdfB) pdfB = fSplB;
2693 
2694  if (!fSplS || !fSplB) {
2695  Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2696  << " fSplS or fSplB are not yet filled" << Endl;
2697  return 0;
2698  }else{
2699  return gTools().GetSeparation( *pdfS, *pdfB );
2700  }
2701 }
2702 
2703 ////////////////////////////////////////////////////////////////////////////////
2704 /// calculate the area (integral) under the ROC curve as a
2705 /// overall quality measure of the classification
2706 
2708 {
2709  // note, if zero pointers given, use internal pdf
2710  // sanity check first
2711  if ((!histS && histB) || (histS && !histB))
2712  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2713 
2714  if (histS==0 || histB==0) return 0.;
2715 
2716  TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2717  TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2718 
2719 
2720  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2721  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2722 
2723  Double_t integral = 0;
2724  UInt_t nsteps = 1000;
2725  Double_t step = (xmax-xmin)/Double_t(nsteps);
2726  Double_t cut = xmin;
2727  for (UInt_t i=0; i<nsteps; i++) {
2728  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2729  cut+=step;
2730  }
2731  return integral*step;
2732 }
2733 
2734 
2735 ////////////////////////////////////////////////////////////////////////////////
2736 /// calculate the area (integral) under the ROC curve as a
2737 /// overall quality measure of the classification
2738 
2740 {
2741  // note, if zero pointers given, use internal pdf
2742  // sanity check first
2743  if ((!pdfS && pdfB) || (pdfS && !pdfB))
2744  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2745  if (!pdfS) pdfS = fSplS;
2746  if (!pdfB) pdfB = fSplB;
2747 
2748  if (pdfS==0 || pdfB==0) return 0.;
2749 
2750  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2751  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2752 
2753  Double_t integral = 0;
2754  UInt_t nsteps = 1000;
2755  Double_t step = (xmax-xmin)/Double_t(nsteps);
2756  Double_t cut = xmin;
2757  for (UInt_t i=0; i<nsteps; i++) {
2758  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2759  cut+=step;
2760  }
2761  return integral*step;
2762 }
2763 
2764 ////////////////////////////////////////////////////////////////////////////////
2765 /// plot significance, S/Sqrt(S^2 + B^2), curve for given number
2766 /// of signal and background events; returns cut for maximum significance
2767 /// also returned via reference is the maximum significance
2768 
2770  Double_t BackgroundEvents,
2771  Double_t& max_significance_value ) const
2772 {
2774 
2775  Double_t max_significance(0);
2776  Double_t effS(0),effB(0),significance(0);
2777  TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2778 
2779  if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2780  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2781  << "Number of signal or background events is <= 0 ==> abort"
2782  << Endl;
2783  }
2784 
2785  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2786  << SignalEvents/BackgroundEvents << Endl;
2787 
2788  TH1* eff_s = results->GetHist("MVA_EFF_S");
2789  TH1* eff_b = results->GetHist("MVA_EFF_B");
2790 
2791  if ( (eff_s==0) || (eff_b==0) ) {
2792  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2793  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2794  return 0;
2795  }
2796 
2797  for (Int_t bin=1; bin<=fNbinsH; bin++) {
2798  effS = eff_s->GetBinContent( bin );
2799  effB = eff_b->GetBinContent( bin );
2800 
2801  // put significance into a histogram
2802  significance = sqrt(SignalEvents)*( effS )/sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
2803 
2804  temp_histogram->SetBinContent(bin,significance);
2805  }
2806 
2807  // find maximum in histogram
2808  max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
2809  max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
2810 
2811  // delete
2812  delete temp_histogram;
2813 
2814  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
2815  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
2816 
2817  return max_significance;
2818 }
2819 
2820 ////////////////////////////////////////////////////////////////////////////////
2821 /// calculates rms,mean, xmin, xmax of the event variable
2822 /// this can be either done for the variables as they are or for
2823 /// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
2824 
2825 void TMVA::MethodBase::Statistics( Types::ETreeType treeType, const TString& theVarName,
2826  Double_t& meanS, Double_t& meanB,
2827  Double_t& rmsS, Double_t& rmsB,
2828  Double_t& xmin, Double_t& xmax )
2829 {
2830  Types::ETreeType previousTreeType = Data()->GetCurrentType();
2831  Data()->SetCurrentType(treeType);
2832 
2833  Long64_t entries = Data()->GetNEvents();
2834 
2835  // sanity check
2836  if (entries <=0)
2837  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
2838 
2839  // index of the wanted variable
2840  UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
2841 
2842  // first fill signal and background in arrays before analysis
2843  xmin = +DBL_MAX;
2844  xmax = -DBL_MAX;
2845  Long64_t nEventsS = -1;
2846  Long64_t nEventsB = -1;
2847 
2848  // take into account event weights
2849  meanS = 0;
2850  meanB = 0;
2851  rmsS = 0;
2852  rmsB = 0;
2853  Double_t sumwS = 0, sumwB = 0;
2854 
2855  // loop over all training events
2856  for (Int_t ievt = 0; ievt < entries; ievt++) {
2857 
2858  const Event* ev = GetEvent(ievt);
2859 
2860  Double_t theVar = ev->GetValue(varIndex);
2861  Double_t weight = ev->GetWeight();
2862 
2863  if (DataInfo().IsSignal(ev)) {
2864  sumwS += weight;
2865  meanS += weight*theVar;
2866  rmsS += weight*theVar*theVar;
2867  }
2868  else {
2869  sumwB += weight;
2870  meanB += weight*theVar;
2871  rmsB += weight*theVar*theVar;
2872  }
2873  xmin = TMath::Min( xmin, theVar );
2874  xmax = TMath::Max( xmax, theVar );
2875  }
2876  ++nEventsS;
2877  ++nEventsB;
2878 
2879  meanS = meanS/sumwS;
2880  meanB = meanB/sumwB;
2881  rmsS = TMath::Sqrt( rmsS/sumwS - meanS*meanS );
2882  rmsB = TMath::Sqrt( rmsB/sumwB - meanB*meanB );
2883 
2884  Data()->SetCurrentType(previousTreeType);
2885 }
2886 
2887 ////////////////////////////////////////////////////////////////////////////////
2888 /// create reader class for method (classification only at present)
2889 
2890 void TMVA::MethodBase::MakeClass( const TString& theClassFileName ) const
2891 {
2892  // the default consists of
2893  TString classFileName = "";
2894  if (theClassFileName == "")
2895  classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
2896  else
2897  classFileName = theClassFileName;
2898 
2899  TString className = TString("Read") + GetMethodName();
2900 
2901  TString tfname( classFileName );
2902  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
2903  << "Creating standalone class: "
2904  << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
2905 
2906  std::ofstream fout( classFileName );
2907  if (!fout.good()) { // file could not be opened --> Error
2908  Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
2909  }
2910 
2911  // now create the class
2912  // preamble
2913  fout << "// Class: " << className << std::endl;
2914  fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
2915 
2916  // print general information and configuration state
2917  fout << std::endl;
2918  fout << "/* configuration options =====================================================" << std::endl << std::endl;
2919  WriteStateToStream( fout );
2920  fout << std::endl;
2921  fout << "============================================================================ */" << std::endl;
2922 
2923  // generate the class
2924  fout << "" << std::endl;
2925  fout << "#include <vector>" << std::endl;
2926  fout << "#include <cmath>" << std::endl;
2927  fout << "#include <string>" << std::endl;
2928  fout << "#include <iostream>" << std::endl;
2929  fout << "" << std::endl;
2930  // now if the classifier needs to write some addicional classes for its response implementation
2931  // this code goes here: (at least the header declarations need to come before the main class
2932  this->MakeClassSpecificHeader( fout, className );
2933 
2934  fout << "#ifndef IClassifierReader__def" << std::endl;
2935  fout << "#define IClassifierReader__def" << std::endl;
2936  fout << std::endl;
2937  fout << "class IClassifierReader {" << std::endl;
2938  fout << std::endl;
2939  fout << " public:" << std::endl;
2940  fout << std::endl;
2941  fout << " // constructor" << std::endl;
2942  fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
2943  fout << " virtual ~IClassifierReader() {}" << std::endl;
2944  fout << std::endl;
2945  fout << " // return classifier response" << std::endl;
2946  fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
2947  fout << std::endl;
2948  fout << " // returns classifier status" << std::endl;
2949  fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
2950  fout << std::endl;
2951  fout << " protected:" << std::endl;
2952  fout << std::endl;
2953  fout << " bool fStatusIsClean;" << std::endl;
2954  fout << "};" << std::endl;
2955  fout << std::endl;
2956  fout << "#endif" << std::endl;
2957  fout << std::endl;
2958  fout << "class " << className << " : public IClassifierReader {" << std::endl;
2959  fout << std::endl;
2960  fout << " public:" << std::endl;
2961  fout << std::endl;
2962  fout << " // constructor" << std::endl;
2963  fout << " " << className << "( std::vector<std::string>& theInputVars ) " << std::endl;
2964  fout << " : IClassifierReader()," << std::endl;
2965  fout << " fClassName( \"" << className << "\" )," << std::endl;
2966  fout << " fNvars( " << GetNvar() << " )," << std::endl;
2967  fout << " fIsNormalised( " << (IsNormalised() ? "true" : "false") << " )" << std::endl;
2968  fout << " { " << std::endl;
2969  fout << " // the training input variables" << std::endl;
2970  fout << " const char* inputVars[] = { ";
2971  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
2972  fout << "\"" << GetOriginalVarName(ivar) << "\"";
2973  if (ivar<GetNvar()-1) fout << ", ";
2974  }
2975  fout << " };" << std::endl;
2976  fout << std::endl;
2977  fout << " // sanity checks" << std::endl;
2978  fout << " if (theInputVars.size() <= 0) {" << std::endl;
2979  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
2980  fout << " fStatusIsClean = false;" << std::endl;
2981  fout << " }" << std::endl;
2982  fout << std::endl;
2983  fout << " if (theInputVars.size() != fNvars) {" << std::endl;
2984  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
2985  fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
2986  fout << " fStatusIsClean = false;" << std::endl;
2987  fout << " }" << std::endl;
2988  fout << std::endl;
2989  fout << " // validate input variables" << std::endl;
2990  fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
2991  fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
2992  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
2993  fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
2994  fout << " fStatusIsClean = false;" << std::endl;
2995  fout << " }" << std::endl;
2996  fout << " }" << std::endl;
2997  fout << std::endl;
2998  fout << " // initialize min and max vectors (for normalisation)" << std::endl;
2999  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3000  fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
3001  fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
3002  }
3003  fout << std::endl;
3004  fout << " // initialize input variable types" << std::endl;
3005  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3006  fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3007  }
3008  fout << std::endl;
3009  fout << " // initialize constants" << std::endl;
3010  fout << " Initialize();" << std::endl;
3011  fout << std::endl;
3012  if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3013  fout << " // initialize transformation" << std::endl;
3014  fout << " InitTransform();" << std::endl;
3015  }
3016  fout << " }" << std::endl;
3017  fout << std::endl;
3018  fout << " // destructor" << std::endl;
3019  fout << " virtual ~" << className << "() {" << std::endl;
3020  fout << " Clear(); // method-specific" << std::endl;
3021  fout << " }" << std::endl;
3022  fout << std::endl;
3023  fout << " // the classifier response" << std::endl;
3024  fout << " // \"inputValues\" is a vector of input values in the same order as the " << std::endl;
3025  fout << " // variables given to the constructor" << std::endl;
3026  fout << " double GetMvaValue( const std::vector<double>& inputValues ) const;" << std::endl;
3027  fout << std::endl;
3028  fout << " private:" << std::endl;
3029  fout << std::endl;
3030  fout << " // method-specific destructor" << std::endl;
3031  fout << " void Clear();" << std::endl;
3032  fout << std::endl;
3033  if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3034  fout << " // input variable transformation" << std::endl;
3035  GetTransformationHandler().MakeFunction(fout, className,1);
3036  fout << " void InitTransform();" << std::endl;
3037  fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3038  fout << std::endl;
3039  }
3040  fout << " // common member variables" << std::endl;
3041  fout << " const char* fClassName;" << std::endl;
3042  fout << std::endl;
3043  fout << " const size_t fNvars;" << std::endl;
3044  fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3045  fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3046  fout << std::endl;
3047  fout << " // normalisation of input variables" << std::endl;
3048  fout << " const bool fIsNormalised;" << std::endl;
3049  fout << " bool IsNormalised() const { return fIsNormalised; }" << std::endl;
3050  fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3051  fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3052  fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3053  fout << " // normalise to output range: [-1, 1]" << std::endl;
3054  fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3055  fout << " }" << std::endl;
3056  fout << std::endl;
3057  fout << " // type of input variable: 'F' or 'I'" << std::endl;
3058  fout << " char fType[" << GetNvar() << "];" << std::endl;
3059  fout << std::endl;
3060  fout << " // initialize internal variables" << std::endl;
3061  fout << " void Initialize();" << std::endl;
3062  fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3063  fout << "" << std::endl;
3064  fout << " // private members (method specific)" << std::endl;
3065 
3066  // call the classifier specific output (the classifier must close the class !)
3067  MakeClassSpecific( fout, className );
3068 
3069  fout << " inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3070  fout << " {" << std::endl;
3071  fout << " // classifier response value" << std::endl;
3072  fout << " double retval = 0;" << std::endl;
3073  fout << std::endl;
3074  fout << " // classifier response, sanity check first" << std::endl;
3075  fout << " if (!IsStatusClean()) {" << std::endl;
3076  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3077  fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3078  fout << " retval = 0;" << std::endl;
3079  fout << " }" << std::endl;
3080  fout << " else {" << std::endl;
3081  fout << " if (IsNormalised()) {" << std::endl;
3082  fout << " // normalise variables" << std::endl;
3083  fout << " std::vector<double> iV;" << std::endl;
3084  fout << " iV.reserve(inputValues.size());" << std::endl;
3085  fout << " int ivar = 0;" << std::endl;
3086  fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3087  fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3088  fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3089  fout << " }" << std::endl;
3090  if (GetTransformationHandler().GetTransformationList().GetSize()!=0 &&
3093  fout << " Transform( iV, -1 );" << std::endl;
3094  }
3095  fout << " retval = GetMvaValue__( iV );" << std::endl;
3096  fout << " }" << std::endl;
3097  fout << " else {" << std::endl;
3098  if (GetTransformationHandler().GetTransformationList().GetSize()!=0 &&
3101  fout << " std::vector<double> iV;" << std::endl;
3102  fout << " int ivar = 0;" << std::endl;
3103  fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3104  fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3105  fout << " iV.push_back(*varIt);" << std::endl;
3106  fout << " }" << std::endl;
3107  fout << " Transform( iV, -1 );" << std::endl;
3108  fout << " retval = GetMvaValue__( iV );" << std::endl;
3109  }
3110  else {
3111  fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3112  }
3113  fout << " }" << std::endl;
3114  fout << " }" << std::endl;
3115  fout << std::endl;
3116  fout << " return retval;" << std::endl;
3117  fout << " }" << std::endl;
3118 
3119  // create output for transformation - if any
3120  if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3121  GetTransformationHandler().MakeFunction(fout, className,2);
3122 
3123  // close the file
3124  fout.close();
3125 }
3126 
3127 ////////////////////////////////////////////////////////////////////////////////
3128 /// prints out method-specific help method
3129 
3131 {
3132  // if options are written to reference file, also append help info
3133  std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3134  std::ofstream* o = 0;
3135  if (gConfig().WriteOptionsReference()) {
3136  Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3137  o = new std::ofstream( GetReferenceFile(), std::ios::app );
3138  if (!o->good()) { // file could not be opened --> Error
3139  Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3140  }
3141  std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3142  }
3143 
3144  // "|--------------------------------------------------------------|"
3145  if (!o) {
3146  Log() << kINFO << Endl;
3147  Log() << gTools().Color("bold")
3148  << "================================================================"
3149  << gTools().Color( "reset" )
3150  << Endl;
3151  Log() << gTools().Color("bold")
3152  << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3153  << gTools().Color( "reset" )
3154  << Endl;
3155  }
3156  else {
3157  Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3158  }
3159 
3160  // print method-specific help message
3161  GetHelpMessage();
3162 
3163  if (!o) {
3164  Log() << Endl;
3165  Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3166  Log() << gTools().Color("bold")
3167  << "================================================================"
3168  << gTools().Color( "reset" )
3169  << Endl;
3170  Log() << Endl;
3171  }
3172  else {
3173  // indicate END
3174  Log() << "# End of Message___" << Endl;
3175  }
3176 
3177  std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3178  if (o) o->close();
3179 }
3180 
3181 // ----------------------- r o o t f i n d i n g ----------------------------
3182 
3183 ////////////////////////////////////////////////////////////////////////////////
3184 /// returns efficiency as function of cut
3185 
3187 {
3188  Double_t retval=0;
3189 
3190  // retrieve the class object
3191  if (Use_Splines_for_Eff_) {
3192  retval = fSplRefS->Eval( theCut );
3193  }
3194  else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3195 
3196  // caution: here we take some "forbidden" action to hide a problem:
3197  // in some cases, in particular for likelihood, the binned efficiency distributions
3198  // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3199  // unbinned information available in the trees, but the unbinned minimization is
3200  // too slow, and we don't need to do a precision measurement here. Hence, we force
3201  // this property.
3202  Double_t eps = 1.0e-5;
3203  if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3204  else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3205 
3206  return retval;
3207 }
3208 
3209 ////////////////////////////////////////////////////////////////////////////////
3210 /// returns the event collection (i.e. the dataset) TRANSFORMED using the
3211 /// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3212 
3214 {
3215  // if there's no variable transformation for this classifier, just hand back the
3216  // event collection of the data set
3217  if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3218  return (Data()->GetEventCollection(type));
3219  }
3220 
3221  // otherwise, transform ALL the events and hand back the vector of the pointers to the
3222  // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3223  // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3224  Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3225  if (fEventCollections.at(idx) == 0) {
3226  fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3228  }
3229  return *(fEventCollections.at(idx));
3230 }
3231 
3232 ////////////////////////////////////////////////////////////////////////////////
3233 /// calculates the TMVA version string from the training version code on the fly
3234 
3236 {
3237  UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3238  UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3239  UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3240 
3241  return TString(Form("%i.%i.%i",a,b,c));
3242 }
3243 
3244 ////////////////////////////////////////////////////////////////////////////////
3245 /// calculates the ROOT version string from the training version code on the fly
3246 
3248 {
3249  UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3250  UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3251  UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3252 
3253  return TString(Form("%i.%02i/%02i",a,b,c));
3254 }
3255 
3256 ////////////////////////////////////////////////////////////////////////////////
3257 
3259  ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
3261 
3262  if (mvaRes != NULL) {
3263  TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3264  TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3265  TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3266  TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3267 
3268  if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3269 
3270  if (SorB == 's' || SorB == 'S')
3271  return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3272  else
3273  return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3274  }
3275  return -1;
3276 }
virtual void DeclareOptions()=0
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:589
Config & gConfig()
Definition: Config.cxx:43
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:830
TString fMethodName
Definition: MethodBase.h:609
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:921
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition: TH1.cxx:3442
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:5893
void AddOptionsXMLTo(void *parent) const
write options to XML file
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3127
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters accoding to the argument This is just a dummy .
Definition: MethodBase.cxx:638
TXMLEngine & xmlengine()
Definition: Tools.h:278
const TString & GetWeightFileDir() const
Definition: MethodBase.h:486
float xmin
Definition: THbookFile.cxx:93
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodBase.h:230
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
#define TMVA_VERSION_CODE
Definition: Version.h:47
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:127
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ReadOptionsFromXML(void *node)
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:957
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4640
long long Long64_t
Definition: RtypesCore.h:69
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:206
Bool_t fIgnoreNegWeightsInTraining
Definition: MethodBase.h:676
virtual const char * WorkingDirectory()
Return working directory.
Definition: TSystem.cxx:866
void ReadStateFromXML(void *parent)
std::vector< VariableInfo > & GetSpectatorInfos()
Definition: DataSetInfo.h:122
Ssiz_t Length() const
Definition: TString.h:390
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
TLine * line
Collectable string class.
Definition: TObjString.h:32
TSpline1 * fSplTrainRefS
Definition: MethodBase.h:699
float Float_t
Definition: RtypesCore.h:53
return c
virtual Int_t GetMaximumBin() const
Return location of bin with maximum value in the range.
Definition: TH1.cxx:7651
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
std::vector< TGraph * > fGraphs
Definition: MethodBase.h:114
Double_t GetXmin() const
Definition: PDF.h:112
const TString & GetExpression() const
Definition: VariableInfo.h:65
void ReadOptionsFromStream(std::istream &istr)
read option back from the weight file
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
const char * GetName() const
Definition: MethodBase.h:330
void CheckForUnusedOptions() const
checks for unused options in option string
void BuildPDF(const TH1 *theHist)
Definition: PDF.cxx:254
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:617
TString fWeightFile
Definition: MethodBase.h:632
TString fVariableTransformTypeString
Definition: MethodBase.h:718
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
TransformationHandler * fTransformationPointer
Definition: MethodBase.h:665
Types::ESBType fVariableTransformType
Definition: MethodBase.h:605
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
DataSet * Data() const
Definition: MethodBase.h:405
UInt_t GetNvar() const
Definition: MethodBase.h:340
EAnalysisType
Definition: Types.h:128
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:37
UInt_t TreeIndex(Types::ETreeType type) const
Definition: DataSet.h:204
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum=0)
Compute Quantiles for this histogram Quantile x_q of a probability distribution Function F is defined...
Definition: TH1.cxx:4192
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:822
const TString & GetOriginalVarName(Int_t ivar) const
Definition: MethodBase.h:505
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:186
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, S/Sqrt(S^2 + B^2), curve for given number of signal and background events; returns...
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodBase.h:224
Bool_t IsNormalised() const
Definition: MethodBase.h:490
Basic string class.
Definition: TString.h:137
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition: TH1.cxx:700
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
void SetTrainTime(Double_t trainTime)
Definition: MethodBase.h:165
TMultiGraph * fMultiGraph
Definition: MethodBase.h:111
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition: Tools.cxx:598
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1089
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
int Int_t
Definition: RtypesCore.h:41
virtual void SetYTitle(const char *title)
Definition: TH1.h:414
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory and return a pointer to the created directory.
Definition: TDirectory.cxx:957
bool Bool_t
Definition: RtypesCore.h:59
TArc * a
Definition: textangle.C:12
const Bool_t kFALSE
Definition: Rtypes.h:92
virtual void TestMulticlass()
test multiclass classification
TString fJobName
Definition: MethodBase.h:608
ECutOrientation GetCutOrientation() const
Definition: MethodBase.h:546
TSpline1 * fSplRefB
Definition: MethodBase.h:697
TSpline1 * fSplRefS
Definition: MethodBase.h:696
virtual Int_t GetNbinsX() const
Definition: TH1.h:301
std::vector< TString > * fInputVars
Definition: MethodBase.h:582
STL namespace.
const std::vector< Event * > * CalcTransformations(const std::vector< Event * > &, Bool_t createNewVector=kFALSE)
computation of transformation
#define ROOT_VERSION_CODE
Definition: RVersion.h:21
void ReadTargetsFromXML(void *tarnode)
read target info from XML
virtual void Init()=0
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Definition: Timer.cxx:129
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:309
Double_t fTrainTime
Definition: MethodBase.h:689
UInt_t GetNVariables() const
Definition: DataSetInfo.h:128
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:558
Double_t GetTrainingSumSignalWeights()
void WriteOptionsToStream(std::ostream &o, const TString &prefix) const
write options to output stream (e.g. in writing the MVA weight files
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Double_t fTestTime
Definition: MethodBase.h:690
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:311
MsgLogger * fLogger
Definition: Configurable.h:134
const TString & GetReferenceFile() const
Definition: Configurable.h:108
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:837
const TString & GetMethodName() const
Definition: MethodBase.h:327
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1220
TString GetWeightFileName() const
retrieve weight file name
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part) const
create transformation function
void ReadVariablesFromXML(void *varnode)
read variable info from XML
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3871
UInt_t fSignalClass
Definition: MethodBase.h:683
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:85
const char * Data() const
Definition: TString.h:349
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
double sqrt(double)
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:388
Tools & gTools()
Definition: Tools.cxx:79
TStopwatch timer
Definition: pirndm.C:37
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:601
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2335
ECutOrientation fCutOrientation
Definition: MethodBase.h:693
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables, variable transformation type etc.
virtual ~MethodBase()
destructor
Definition: MethodBase.cxx:361
Bool_t IsSignal(const Event *ev) const
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
Definition: MethodBase.cxx:880
UInt_t GetNTargets() const
Definition: MethodBase.h:342
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream ...
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructur
Definition: MethodBase.cxx:234
void ClearGraphs()
This function sets the point number to 0 for all graphs.
Definition: MethodBase.cxx:194
Bool_t CheckSplines(const TH1 *, const TSpline *)
Definition: Tools.cxx:487
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
void ReadStateFromFile()
Function to write options and weights to file.
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
std::vector< Float_t > * GetValueVector()
bool BeginsWith(const std::string &theString, const std::string &theSubstring)
void SetCallerName(const TString &name)
~IPythonInteractive()
standard destructor
Definition: MethodBase.cxx:158
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:851
Bool_t DoMulticlass() const
Definition: MethodBase.h:435
Double_t fMeanB
Definition: MethodBase.h:656
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
IONames & GetIONames()
Definition: Config.h:78
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:395
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:403
TH1 * GetSmoothedHist() const
Definition: PDF.h:103
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:171
void SetOptions(const TString &s)
Definition: Configurable.h:91
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition: TSystem.cxx:1563
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1965
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:76
TString fTestvar
Definition: MethodBase.h:611
Types::EMVA GetMethodType() const
Definition: MethodBase.h:329
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:113
virtual void AddWeightsXMLTo(void *parent) const =0
virtual void ProcessOptions()=0
virtual Double_t GetProba(const Event *ev)
Definition: PDF.h:71
TH1F * h1
Definition: legend1.C:5
TSpline * fSpleffBvsS
Definition: MethodBase.h:646
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
Definition: TH1.cxx:1195
void ValidatePDF(TH1 *original=0) const
comparison of original histogram with reference PDF
Definition: PDF.cxx:576
Double_t GetXmin() const
Definition: TAxis.h:139
std::vector< VariableInfo > & GetTargetInfos()
Definition: DataSetInfo.h:117
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
Bool_t HasMVAPdfs() const
Definition: MethodBase.h:431
Bool_t fModelPersistence
Definition: MethodBase.h:627
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Definition: RootFinder.cxx:68
UInt_t fTMVATrainingVersion
Definition: MethodBase.h:612
A doubly linked list.
Definition: TList.h:47
TransformationHandler fTransformation
Definition: MethodBase.h:666
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
UInt_t GetNVariables() const
Definition: MethodBase.h:341
void AddClassesXMLTo(void *parent) const
write class info to XML
virtual void ReadWeightsFromXML(void *wghtnode)=0
const int nEvents
Definition: testRooFit.cxx:42
Int_t GetHistNBins(Int_t evtNum=0)
Definition: PDF.cxx:298
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
TString fWeightFileExtension
Definition: Config.h:101
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8208
TString fUser
Definition: TSystem.h:152
virtual void Train()=0
char GetVarType() const
Definition: VariableInfo.h:69
void PlotVariables(const std::vector< Event * > &events, TDirectory *theDirectory=0)
create histograms from the input variables
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6717
Double_t fRmsB
Definition: MethodBase.h:658
Double_t fXmin
Definition: MethodBase.h:659
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:286
TSpline1 * fSplTrainRefB
Definition: MethodBase.h:700
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
Definition: TString.cxx:2221
TRandom2 r(17)
Class to manage histogram axis.
Definition: TAxis.h:36
R__EXTERN TSystem * gSystem
Definition: TSystem.h:549
virtual const char * GetBuildNode() const
Return the build node name.
Definition: TSystem.cxx:3749
TDirectory * fMethodBaseDir
Definition: MethodBase.h:620
SVector< double, 2 > v
Definition: Dict.h:5
UInt_t fROOTTrainingVersion
Definition: MethodBase.h:613
Bool_t DoesExist(const TString &alias) const
Definition: Results.cxx:118
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream...
TH1 * GetOriginalHist() const
Definition: PDF.h:102
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------— ...
virtual void ReadTransformationFromStream(std::istream &istr, const TString &classname="")=0
const Int_t NBIN_HIST_HIGH
Definition: MethodBase.cxx:137
tomato 2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:255
class TMVA::Config::VariablePlotting fVariablePlotting
ClassInfo * GetClassInfo(Int_t clNum) const
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8280
Float_t GetAchievablePur(UInt_t cls)
void SetReadingVersion(UInt_t rv)
Definition: PDF.h:127
void SetValue(Float_t value, Int_t ievt)
set MVA response
virtual Double_t KolmogorovTest(const TH1 *h2, Option_t *option="") const
Statistical test of compatibility in shape between this histogram and h2, using Kolmogorov test...
Definition: TH1.cxx:7316
Bool_t Help() const
Definition: MethodBase.h:498
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fMeanS
Definition: MethodBase.h:655
const Event * GetEvent() const
Definition: MethodBase.h:745
char * Form(const char *fmt,...)
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
Definition: Tools.cxx:136
void ReadFromXML(void *varnode)
read VariableInfo from stream
Int_t fNsmoothMVAPdf
Definition: MethodBase.h:721
Bool_t fTxtWeightsOnly
Definition: MethodBase.h:719
TDirectory * fBaseDir
Definition: MethodBase.h:619
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
TSubString Strip(EStripType s=kTrailing, char c= ' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1070
virtual Double_t GetSignificance() const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
Bool_t fHasMVAPdfs
Definition: MethodBase.h:674
TSpline * fSplTrainEffBvsS
Definition: MethodBase.h:650
float xmax
Definition: THbookFile.cxx:93
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:217
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
Definition: MethodBase.cxx:503
void AddXMLTo(void *parent=0) const
XML node describing the transformation return;.
UInt_t GetTrainingROOTVersionCode() const
Definition: MethodBase.h:386
void BuildTransformationFromVarInfo(const std::vector< TMVA::VariableInfo > &var)
this method is only used when building a normalization transformation from old text files in this cas...
tomato 1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:618
Bool_t IsSilentFile()
Definition: MethodBase.h:375
Double_t GetTrainTime() const
Definition: MethodBase.h:166
REAL epsilon
Definition: triangle.c:617
void ProcessBaseOptions()
the option string is decoded, for availabel options see "DeclareOptions"
Definition: MethodBase.cxx:534
Double_t ElapsedSeconds(void)
computes elapsed tim in seconds
Definition: Timer.cxx:122
std::vector< const std::vector< TMVA::Event * > * > fEventCollections
Definition: MethodBase.h:702
TString fVerbosityLevelString
Definition: MethodBase.h:671
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
Double_t fRmsS
Definition: MethodBase.h:657
UInt_t fBackgroundClass
Definition: MethodBase.h:684
const Event * GetEvent() const
Definition: DataSet.cxx:211
void AddTargetsXMLTo(void *parent) const
write target info to XML
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: PDFInterpol[i...
Definition: PDF.cxx:816
void AddVarsXMLTo(void *parent) const
write variable info to XML
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
TList * GetStorage() const
Definition: Results.h:79
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:397
int Ssiz_t
Definition: RtypesCore.h:63
TFile * GetFile() const
Definition: MethodBase.h:366
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
virtual void MakeClassSpecific(std::ostream &, const TString &="") const
Definition: MethodBase.h:514
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual Int_t GetSize() const
Definition: TCollection.h:95
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:430
#define ClassImp(name)
Definition: Rtypes.h:279
void SetTestTime(Double_t testTime)
Definition: MethodBase.h:169
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:738
virtual void GetHelpMessage() const =0
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition: PDF.cxx:699
double Double_t
Definition: RtypesCore.h:55
std::vector< Double_t > GetBestMultiClassCuts(UInt_t targetClass)
void SetWeightFileName(TString)
set the weight file name (depreciated)
Describe directory structure in memory.
Definition: TDirectory.h:44
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:592
Double_t GetTrainingSumBackgrWeights()
int type
Definition: TGX11.cxx:120
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
Double_t GetXmax() const
Definition: TAxis.h:140
static RooMathCoreReg dummy
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
MsgLogger & Log() const
Definition: Configurable.h:128
The TH1 histogram class.
Definition: TH1.h:80
const Bool_t Use_Splines_for_Eff_
Definition: MethodBase.cxx:134
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:114
void AddPreDefVal(const T &)
Definition: Configurable.h:174
IPythonInteractive()
standard constructur
Definition: MethodBase.cxx:150
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
Definition: MethodBase.cxx:718
ClassInfo * AddClass(const TString &className)
void AddXMLTo(void *parent)
XML file writing.
Definition: PDF.cxx:915
Double_t GetSignalReferenceCutOrientation() const
Definition: MethodBase.h:357
virtual Double_t Eval(Double_t x) const
returns linearly interpolated TGraph entry around x
Definition: TSpline1.cxx:61
Bool_t fConstructedFromWeightFile
Definition: MethodBase.h:614
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:420
TString fVarTransformString
Definition: MethodBase.h:663
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:777
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
void SetConfigName(const char *n)
Definition: Configurable.h:69
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
void * GetExternalLink() const
Definition: VariableInfo.h:89
Float_t GetAchievableEff(UInt_t cls)
void SetSource(const std::string &source)
Definition: MsgLogger.h:74
Types::EMVA fMethodType
Definition: MethodBase.h:610
char Char_t
Definition: RtypesCore.h:29
virtual Double_t Eval(Double_t x) const =0
Bool_t DoRegression() const
Definition: MethodBase.h:434
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
virtual void MakeClassSpecificHeader(std::ostream &, const TString &="") const
Definition: MethodBase.h:517
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
Ranking * fRanking
Definition: MethodBase.h:581
virtual void SetXTitle(const char *title)
Definition: TH1.h:413
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:963
TH1 * GetPDFHist() const
Definition: PDF.h:100
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:435
const TString & GetJobName() const
Definition: MethodBase.h:326
void ReadFromStream(std::istream &istr)
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:353
void PrintHelpMessage() const
prints out method-specific help method
UInt_t GetTrainingTMVAVersionCode() const
Definition: MethodBase.h:385
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:590
void ProcessOptions()
Definition: PDF.cxx:866
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
const TString & GetOptions() const
Definition: Configurable.h:90
void AddToXML(void *varnode)
write class to XML
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
Double_t fSignalReferenceCut
the data set information (sometimes needed)
Definition: MethodBase.h:603
const Event * fTmpEvent
Definition: MethodBase.h:408
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored ...
void SetWeightFileDir(TString fileDir)
set directory of weight file
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
UInt_t GetNSpectators(bool all=kTRUE) const
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
Definition: TH1.cxx:8087
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:53
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
Definition: TDirectory.cxx:338
Int_t FindVarIndex(const TString &) const
find variable by name
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
#define NULL
Definition: Rtypes.h:82
Int_t fNbinsMVAoutput
Definition: MethodBase.h:586
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:301
virtual Double_t GetTrainingEfficiency(const TString &)
Bool_t fSilentFile
Definition: MethodBase.h:625
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
Definition: PDF.cxx:652
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:239
Double_t fXmax
Definition: MethodBase.h:660
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
std::vector< Bool_t > * GetValueVectorTypes()
void InitBase()
default initialization called by all constructors
Definition: MethodBase.cxx:438
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:591
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:93
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:356
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:83
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity: R(x) = Integrate_[-oo..x] { PDF(x&#39;) dx&#39; } where PDF(x) is the PDF of the classifier&#39;s...
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
TString GetMethodTypeName() const
Definition: MethodBase.h:328
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:582
virtual void ReadWeightsFromStream(std::istream &)=0
const char * AsString() const
Return the date & time as a string (ctime() format).
Definition: TDatime.cxx:101
virtual Double_t GetMaximum(Double_t maxval=FLT_MAX) const
Return maximum value smaller than maxval of bins in the range, unless the value has been overridden b...
Definition: TH1.cxx:7621
const Bool_t kTRUE
Definition: Rtypes.h:91
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:308
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:337
TString fFileDir
Definition: MethodBase.h:631
UInt_t GetNumber() const
Definition: ClassInfo.h:73
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables, variable transformation type etc.
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:215
Double_t GetXmax() const
Definition: PDF.h:113
virtual void TestClassification()
initialization
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write all objects in this collection.
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:112
void SetExternalLink(void *p)
Definition: VariableInfo.h:81
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:432
char name[80]
Definition: TGX11.cxx:109
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:467
Bool_t fSetupCompleted
Definition: MethodBase.h:705
TAxis * GetXaxis()
Definition: TH1.h:324
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:819
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360
void Resize(Ssiz_t n)
Resize the string. Truncate or add blanks as necessary.
Definition: TString.cxx:1059
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
Definition: TDatime.h:39
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)
void SetConfigDescription(const char *d)
Definition: Configurable.h:70
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:352
virtual void Close(Option_t *option="")
Close a file.
Definition: TFile.cxx:904
const TString & GetTestvarName() const
Definition: MethodBase.h:331