Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodBase.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodBase *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21 * *
22 * Copyright (c) 2005-2011: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * U. of Bonn, Germany *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (see tmva/doc/LICENSE) *
31 * *
32 **********************************************************************************/
33
34/*! \class TMVA::MethodBase
35\ingroup TMVA
36
37 Virtual base Class for all MVA method
38
39 MethodBase hosts several specific evaluation methods.
40
41 The kind of MVA that provides optimal performance in an analysis strongly
42 depends on the particular application. The evaluation factory provides a
43 number of numerical benchmark results to directly assess the performance
44 of the MVA training on the independent test sample. These are:
45
46 - The _signal efficiency_ at three representative background efficiencies
47 (which is 1 &minus; rejection).
48 - The _significance_ of an MVA estimator, defined by the difference
49 between the MVA mean values for signal and background, divided by the
50 quadratic sum of their root mean squares.
51 - The _separation_ of an MVA _x_, defined by the integral
52 \f[
53 \frac{1}{2} \int \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx
54 \f]
55 where
56 \f$ S(x) \f$ and \f$ B(x) \f$ are the signal and background distributions,
57 respectively. The separation is zero for identical signal and background MVA
58 shapes, and it is one for disjunctive shapes.
59 - The average, \f$ \int x \mu (S(x)) dx \f$, of the signal \f$ \mu_{transform} \f$.
60 The \f$ \mu_{transform} \f$ of an MVA denotes the transformation that yields
61 a uniform background distribution. In this way, the signal distributions
62 \f$ S(x) \f$ can be directly compared among the various MVAs. The stronger
63 \f$ S(x) \f$ peaks towards one, the better is the discrimination of the MVA.
64 The \f$ \mu_{transform} \f$ is
65 [documented here](http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html).
66
67 The MVA standard output also prints the linear correlation coefficients between
68 signal and background, which can be useful to eliminate variables that exhibit too
69 strong correlations.
70*/
71
72#include "TMVA/MethodBase.h"
73
74#include "TMVA/Config.h"
75#include "TMVA/Configurable.h"
76#include "TMVA/DataSetInfo.h"
77#include "TMVA/DataSet.h"
78#include "TMVA/Factory.h"
79#include "TMVA/IMethod.h"
80#include "TMVA/MsgLogger.h"
81#include "TMVA/PDF.h"
82#include "TMVA/Ranking.h"
83#include "TMVA/DataLoader.h"
84#include "TMVA/Tools.h"
85#include "TMVA/Results.h"
89#include "TMVA/RootFinder.h"
90#include "TMVA/Timer.h"
91#include "TMVA/TSpline1.h"
92#include "TMVA/Types.h"
96#include "TMVA/VariableInfo.h"
100#include "TMVA/Version.h"
101
102#include "TROOT.h"
103#include "TSystem.h"
104#include "TObjString.h"
105#include "TQObject.h"
106#include "TSpline.h"
107#include "TMatrix.h"
108#include "TMath.h"
109#include "TH1F.h"
110#include "TH2F.h"
111#include "TFile.h"
112#include "TGraph.h"
113#include "TXMLEngine.h"
114
115#include <iomanip>
116#include <iostream>
117#include <fstream>
118#include <sstream>
119#include <cstdlib>
120#include <algorithm>
121#include <limits>
122
123
124
125using std::endl;
126using std::atof;
127
128//const Int_t MethodBase_MaxIterations_ = 200;
130
131//const Int_t NBIN_HIST_PLOT = 100;
132const Int_t NBIN_HIST_HIGH = 10000;
133
134#ifdef _WIN32
135/* Disable warning C4355: 'this' : used in base member initializer list */
136#pragma warning ( disable : 4355 )
137#endif
138
139
140#include "TMultiGraph.h"
141
142////////////////////////////////////////////////////////////////////////////////
143/// standard constructor
144
146{
147 fNumGraphs = 0;
148 fIndex = 0;
149}
150
151////////////////////////////////////////////////////////////////////////////////
152/// standard destructor
154{
155 if (fMultiGraph){
156 delete fMultiGraph;
157 fMultiGraph = nullptr;
158 }
159 return;
160}
161
162////////////////////////////////////////////////////////////////////////////////
163/// This function gets some title and it creates a TGraph for every title.
164/// It also sets up the style for every TGraph. All graphs are added to a single TMultiGraph.
165///
166/// \param[in] graphTitles vector of titles
167
169{
170 if (fNumGraphs!=0){
171 std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
172 return;
173 }
174 Int_t color = 2;
175 for(auto& title : graphTitles){
176 fGraphs.push_back( new TGraph() );
177 fGraphs.back()->SetTitle(title);
178 fGraphs.back()->SetName(title);
179 fGraphs.back()->SetFillColor(color);
180 fGraphs.back()->SetLineColor(color);
181 fGraphs.back()->SetMarkerColor(color);
182 fMultiGraph->Add(fGraphs.back());
183 color += 2;
184 fNumGraphs += 1;
185 }
186 return;
187}
188
189////////////////////////////////////////////////////////////////////////////////
190/// This function sets the point number to 0 for all graphs.
191
193{
194 for(Int_t i=0; i<fNumGraphs; i++){
195 fGraphs[i]->Set(0);
196 }
197}
198
199////////////////////////////////////////////////////////////////////////////////
200/// This function is used only in 2 TGraph case, and it will add new data points to graphs.
201///
202/// \param[in] x the x coordinate
203/// \param[in] y1 the y coordinate for the first TGraph
204/// \param[in] y2 the y coordinate for the second TGraph
205
207{
208 fGraphs[0]->Set(fIndex+1);
209 fGraphs[1]->Set(fIndex+1);
210 fGraphs[0]->SetPoint(fIndex, x, y1);
211 fGraphs[1]->SetPoint(fIndex, x, y2);
212 fIndex++;
213 return;
214}
215
216////////////////////////////////////////////////////////////////////////////////
217/// This function can add data points to as many TGraphs as we have.
218///
219/// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
220/// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
221
222void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
223{
224 for(Int_t i=0; i<fNumGraphs;i++){
225 fGraphs[i]->Set(fIndex+1);
226 fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
227 }
228 fIndex++;
229 return;
230}
231
232
233////////////////////////////////////////////////////////////////////////////////
234/// standard constructor
235
238 const TString& methodTitle,
240 const TString& theOption) :
241 IMethod(),
243 fTmpEvent ( 0 ),
244 fRanking ( 0 ),
245 fInputVars ( 0 ),
246 fAnalysisType ( Types::kNoAnalysisType ),
247 fRegressionReturnVal ( 0 ),
248 fMulticlassReturnVal ( 0 ),
249 fDataSetInfo ( dsi ),
250 fSignalReferenceCut ( 0.5 ),
251 fSignalReferenceCutOrientation( 1. ),
252 fVariableTransformType ( Types::kSignal ),
253 fJobName ( jobName ),
254 fMethodName ( methodTitle ),
255 fMethodType ( methodType ),
256 fTestvar ( "" ),
257 fTMVATrainingVersion ( TMVA_VERSION_CODE ),
258 fROOTTrainingVersion ( ROOT_VERSION_CODE ),
259 fConstructedFromWeightFile ( kFALSE ),
260 fBaseDir ( 0 ),
261 fMethodBaseDir ( 0 ),
262 fFile ( 0 ),
263 fSilentFile (kFALSE),
264 fModelPersistence (kTRUE),
265 fWeightFile ( "" ),
266 fEffS ( 0 ),
267 fDefaultPDF ( 0 ),
268 fMVAPdfS ( 0 ),
269 fMVAPdfB ( 0 ),
270 fSplS ( 0 ),
271 fSplB ( 0 ),
272 fSpleffBvsS ( 0 ),
273 fSplTrainS ( 0 ),
274 fSplTrainB ( 0 ),
275 fSplTrainEffBvsS ( 0 ),
276 fVarTransformString ( "None" ),
277 fTransformationPointer ( 0 ),
278 fTransformation ( dsi, methodTitle ),
279 fVerbose ( kFALSE ),
280 fVerbosityLevelString ( "Default" ),
281 fHelp ( kFALSE ),
282 fHasMVAPdfs ( kFALSE ),
283 fIgnoreNegWeightsInTraining( kFALSE ),
284 fSignalClass ( 0 ),
285 fBackgroundClass ( 0 ),
286 fSplRefS ( 0 ),
287 fSplRefB ( 0 ),
288 fSplTrainRefS ( 0 ),
289 fSplTrainRefB ( 0 ),
290 fSetupCompleted (kFALSE)
291{
294
295// // default extension for weight files
296}
297
298////////////////////////////////////////////////////////////////////////////////
299/// constructor used for Testing + Application of the MVA,
300/// only (no training), using given WeightFiles
301
304 const TString& weightFile ) :
305 IMethod(),
306 Configurable(""),
307 fTmpEvent ( 0 ),
308 fRanking ( 0 ),
309 fInputVars ( 0 ),
310 fAnalysisType ( Types::kNoAnalysisType ),
311 fRegressionReturnVal ( 0 ),
312 fMulticlassReturnVal ( 0 ),
313 fDataSetInfo ( dsi ),
314 fSignalReferenceCut ( 0.5 ),
315 fVariableTransformType ( Types::kSignal ),
316 fJobName ( "" ),
317 fMethodName ( "MethodBase" ),
318 fMethodType ( methodType ),
319 fTestvar ( "" ),
320 fTMVATrainingVersion ( 0 ),
321 fROOTTrainingVersion ( 0 ),
322 fConstructedFromWeightFile ( kTRUE ),
323 fBaseDir ( 0 ),
324 fMethodBaseDir ( 0 ),
325 fFile ( 0 ),
326 fSilentFile (kFALSE),
327 fModelPersistence (kTRUE),
328 fWeightFile ( weightFile ),
329 fEffS ( 0 ),
330 fDefaultPDF ( 0 ),
331 fMVAPdfS ( 0 ),
332 fMVAPdfB ( 0 ),
333 fSplS ( 0 ),
334 fSplB ( 0 ),
335 fSpleffBvsS ( 0 ),
336 fSplTrainS ( 0 ),
337 fSplTrainB ( 0 ),
338 fSplTrainEffBvsS ( 0 ),
339 fVarTransformString ( "None" ),
340 fTransformationPointer ( 0 ),
341 fTransformation ( dsi, "" ),
342 fVerbose ( kFALSE ),
343 fVerbosityLevelString ( "Default" ),
344 fHelp ( kFALSE ),
345 fHasMVAPdfs ( kFALSE ),
346 fIgnoreNegWeightsInTraining( kFALSE ),
347 fSignalClass ( 0 ),
348 fBackgroundClass ( 0 ),
349 fSplRefS ( 0 ),
350 fSplRefB ( 0 ),
351 fSplTrainRefS ( 0 ),
352 fSplTrainRefB ( 0 ),
353 fSetupCompleted (kFALSE)
354{
356// // constructor used for Testing + Application of the MVA,
357// // only (no training), using given WeightFiles
358}
359
360////////////////////////////////////////////////////////////////////////////////
361/// destructor
362
364{
365 // destructor
366 if (!fSetupCompleted) Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
367
368 // destructor
369 if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
370 if (fRanking != 0) delete fRanking;
371
372 // PDFs
373 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
374 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
375 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
376
377 // Splines
378 if (fSplS) { delete fSplS; fSplS = 0; }
379 if (fSplB) { delete fSplB; fSplB = 0; }
380 if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
381 if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
382 if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
383 if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
384 if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
385 if (fSplTrainEffBvsS) { delete fSplTrainEffBvsS; fSplTrainEffBvsS = 0; }
386
387 for (size_t i = 0; i < fEventCollections.size(); i++ ) {
388 if (fEventCollections.at(i)) {
389 for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
390 it != fEventCollections.at(i)->end(); ++it) {
391 delete (*it);
392 }
393 delete fEventCollections.at(i);
394 fEventCollections.at(i) = nullptr;
395 }
396 }
397
398 if (fRegressionReturnVal) delete fRegressionReturnVal;
399 if (fMulticlassReturnVal) delete fMulticlassReturnVal;
400}
401
402////////////////////////////////////////////////////////////////////////////////
403/// setup of methods
404
406{
407 // setup of methods
408
409 if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
410 InitBase();
411 DeclareBaseOptions();
412 Init();
413 DeclareOptions();
414 fSetupCompleted = kTRUE;
415}
416
417////////////////////////////////////////////////////////////////////////////////
418/// process all options
419/// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
420/// (sometimes, eg, fitters are used which can only be implemented during training phase)
421
423{
424 ProcessBaseOptions();
425 ProcessOptions();
426}
427
428////////////////////////////////////////////////////////////////////////////////
429/// check may be overridden by derived class
430/// (sometimes, eg, fitters are used which can only be implemented during training phase)
431
433{
434 CheckForUnusedOptions();
435}
436
437////////////////////////////////////////////////////////////////////////////////
438/// default initialization called by all constructors
439
441{
442 SetConfigDescription( "Configuration options for classifier architecture and tuning" );
443
444 fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
445 fNbinsMVAoutput = gConfig().fVariablePlotting.fNbinsMVAoutput;
446 fNbinsH = NBIN_HIST_HIGH;
447
448 fSplTrainS = 0;
449 fSplTrainB = 0;
450 fSplTrainEffBvsS = 0;
451 fMeanS = -1;
452 fMeanB = -1;
453 fRmsS = -1;
454 fRmsB = -1;
455 fXmin = DBL_MAX;
456 fXmax = -DBL_MAX;
457 fTxtWeightsOnly = kTRUE;
458 fSplRefS = 0;
459 fSplRefB = 0;
460
461 fTrainTime = -1.;
462 fTestTime = -1.;
463
464 fRanking = 0;
465
466 // temporary until the move to DataSet is complete
467 fInputVars = new std::vector<TString>;
468 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
469 fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
470 }
471 fRegressionReturnVal = 0;
472 fMulticlassReturnVal = 0;
473
474 fEventCollections.resize( 2 );
475 fEventCollections.at(0) = 0;
476 fEventCollections.at(1) = 0;
477
478 // retrieve signal and background class index
479 if (DataInfo().GetClassInfo("Signal") != 0) {
480 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
481 }
482 if (DataInfo().GetClassInfo("Background") != 0) {
483 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
484 }
485
486 SetConfigDescription( "Configuration options for MVA method" );
487 SetConfigName( TString("Method") + GetMethodTypeName() );
488}
489
490////////////////////////////////////////////////////////////////////////////////
491/// define the options (their key words) that can be set in the option string
492/// here the options valid for ALL MVA methods are declared.
493///
494/// know options:
495///
496/// - VariableTransform=None,Decorrelated,PCA to use transformed variables
497/// instead of the original ones
498/// - VariableTransformType=Signal,Background which decorrelation matrix to use
499/// in the method. Only the Likelihood
500/// Method can make proper use of independent
501/// transformations of signal and background
502/// - fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
503/// - fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
504/// - fHasMVAPdfs create PDFs for the MVA outputs
505/// - V for Verbose output (!V) for non verbos
506/// - H for Help message
507
509{
510 DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
511
512 DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
513 AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
514 AddPreDefVal( TString("Debug") );
515 AddPreDefVal( TString("Verbose") );
516 AddPreDefVal( TString("Info") );
517 AddPreDefVal( TString("Warning") );
518 AddPreDefVal( TString("Error") );
519 AddPreDefVal( TString("Fatal") );
520
521 // If True (default): write all training results (weights) as text files only;
522 // if False: write also in ROOT format (not available for all methods - will abort if not
523 fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
524 fNormalise = kFALSE; // OBSOLETE !!!
525
526 DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
527
528 DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
529
530 DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
531
532 DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
533 "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
534}
535
536////////////////////////////////////////////////////////////////////////////////
537/// the option string is decoded, for available options see "DeclareOptions"
538
540{
541 if (HasMVAPdfs()) {
542 // setting the default bin num... maybe should be static ? ==> Please no static (JS)
543 // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
544 // reading every PDF's definition and passing the option string to the next one to be read and marked
545 fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
546 fDefaultPDF->DeclareOptions();
547 fDefaultPDF->ParseOptions();
548 fDefaultPDF->ProcessOptions();
549 fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
550 fMVAPdfB->DeclareOptions();
551 fMVAPdfB->ParseOptions();
552 fMVAPdfB->ProcessOptions();
553 fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
554 fMVAPdfS->DeclareOptions();
555 fMVAPdfS->ParseOptions();
556 fMVAPdfS->ProcessOptions();
557
558 // the final marked option string is written back to the original methodbase
559 SetOptions( fMVAPdfS->GetOptions() );
560 }
561
562 TMVA::CreateVariableTransforms( fVarTransformString,
563 DataInfo(),
564 GetTransformationHandler(),
565 Log() );
566
567 if (!HasMVAPdfs()) {
568 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
569 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
570 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
571 }
572
573 if (fVerbose) { // overwrites other settings
574 fVerbosityLevelString = TString("Verbose");
575 Log().SetMinType( kVERBOSE );
576 }
577 else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
578 else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
579 else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
580 else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
581 else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
582 else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
583 else if (fVerbosityLevelString != "Default" ) {
584 Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
585 << fVerbosityLevelString << "' unknown." << Endl;
586 }
587 Event::SetIgnoreNegWeightsInTraining(fIgnoreNegWeightsInTraining);
588}
589
590////////////////////////////////////////////////////////////////////////////////
591/// options that are used ONLY for the READER to ensure backward compatibility
592/// they are hence without any effect (the reader is only reading the training
593/// options that HAD been used at the training of the .xml weight file at hand
594
596{
597 DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
598 DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
599 DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
600 "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
601 AddPreDefVal( TString("Signal") );
602 AddPreDefVal( TString("Background") );
603 DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
604 // Why on earth ?? was this here? Was the verbosity level option meant to 'disappear? Not a good idea i think..
605 // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
606 // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
607 // AddPreDefVal( TString("Debug") );
608 // AddPreDefVal( TString("Verbose") );
609 // AddPreDefVal( TString("Info") );
610 // AddPreDefVal( TString("Warning") );
611 // AddPreDefVal( TString("Error") );
612 // AddPreDefVal( TString("Fatal") );
613 DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
614 DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
615}
616
617
618////////////////////////////////////////////////////////////////////////////////
619/// call the Optimizer with the set of parameters and ranges that
620/// are meant to be tuned.
621
622std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
623{
624 // this is just a dummy... needs to be implemented for each method
625 // individually (as long as we don't have it automatized via the
626 // configuration string
627
628 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
629 << GetName() << Endl;
630 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
631
632 std::map<TString,Double_t> tunedParameters;
633 tunedParameters.size(); // just to get rid of "unused" warning
634 return tunedParameters;
635
636}
637
638////////////////////////////////////////////////////////////////////////////////
639/// set the tuning parameters according to the argument
640/// This is just a dummy .. have a look at the MethodBDT how you could
641/// perhaps implement the same thing for the other Classifiers..
642
643void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
644{
645}
646
647////////////////////////////////////////////////////////////////////////////////
648
650{
651 Data()->SetCurrentType(Types::kTraining);
652 Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
653
654 // train the MVA method
655 if (Help()) PrintHelpMessage();
656
657 // all histograms should be created in the method's subdirectory
658 if(!IsSilentFile()) BaseDir()->cd();
659
660 // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
661 // needed for this classifier
662 GetTransformationHandler().CalcTransformations(Data()->GetEventCollection());
663
664 // call training of derived MVA
665 Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
666 << "Begin training" << Endl;
667 Long64_t nEvents = Data()->GetNEvents();
668 Timer traintimer( nEvents, GetName(), kTRUE );
669 Train();
670 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
671 << "\tEnd of training " << Endl;
672 SetTrainTime(traintimer.ElapsedSeconds());
673 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
674 << "Elapsed time for training with " << nEvents << " events: "
675 << traintimer.GetElapsedTime() << " " << Endl;
676
677 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
678 << "\tCreate MVA output for ";
679
680 // create PDFs for the signal and background MVA distributions (if required)
681 if (DoMulticlass()) {
682 Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
683 AddMulticlassOutput(Types::kTraining);
684 }
685 else if (!DoRegression()) {
686
687 Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
688 AddClassifierOutput(Types::kTraining);
689 if (HasMVAPdfs()) {
690 CreateMVAPdfs();
691 AddClassifierOutputProb(Types::kTraining);
692 }
693
694 } else {
695
696 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
697 AddRegressionOutput( Types::kTraining );
698
699 if (HasMVAPdfs() ) {
700 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
701 CreateMVAPdfs();
702 }
703 }
704
705 // write the current MVA state into stream
706 // produced are one text file and one ROOT file
707 if (fModelPersistence ) WriteStateToFile();
708
709 // produce standalone make class (presently only supported for classification)
710 if ((!DoRegression()) && (fModelPersistence)) MakeClass();
711
712 // write additional monitoring histograms to main target file (not the weight file)
713 // again, make sure the histograms go into the method's subdirectory
714 if(!IsSilentFile())
715 {
716 BaseDir()->cd();
717 WriteMonitoringHistosToFile();
718 }
719}
720
721////////////////////////////////////////////////////////////////////////////////
722
724{
725 if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
726 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
727 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), Types::kTesting, Types::kRegression);
728 bool truncate = false;
729 TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
730 stddev = sqrt(h1->GetMean());
731 truncate = true;
732 Double_t yq[1], xq[]={0.9};
733 h1->GetQuantiles(1,yq,xq);
734 TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
735 stddev90Percent = sqrt(h2->GetMean());
736 delete h1;
737 delete h2;
738}
739
740////////////////////////////////////////////////////////////////////////////////
741/// prepare tree branch with the method's discriminating variable
742
744{
745 Data()->SetCurrentType(type);
746
747 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
748
749 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), type, Types::kRegression);
750
751 Long64_t nEvents = Data()->GetNEvents();
752
753 // use timer
754 Timer timer( nEvents, GetName(), kTRUE );
755 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
756 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
757
758 regRes->Resize( nEvents );
759
760 // Drawing the progress bar every event was causing a huge slowdown in the evaluation time
761 // So we set some parameters to draw the progress bar a total of totalProgressDraws, i.e. only draw every 1 in 100
762
763 Int_t totalProgressDraws = 100; // total number of times to update the progress bar
764 Int_t drawProgressEvery = 1; // draw every nth event such that we have a total of totalProgressDraws
766
767 for (Int_t ievt=0; ievt<nEvents; ievt++) {
768
769 Data()->SetCurrentEvent(ievt);
770 std::vector< Float_t > vals = GetRegressionValues();
771 regRes->SetValue( vals, ievt );
772
773 // Only draw the progress bar once in a while, doing this every event causes the evaluation to be ridiculously slow
774 if(ievt % drawProgressEvery == 0 || ievt==nEvents-1) timer.DrawProgressBar( ievt );
775 }
776
777 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
778 << "Elapsed time for evaluation of " << nEvents << " events: "
779 << timer.GetElapsedTime() << " " << Endl;
780
781 // store time used for testing
783 SetTestTime(timer.ElapsedSeconds());
784
785 TString histNamePrefix(GetTestvarName());
786 histNamePrefix += (type==Types::kTraining?"train":"test");
787 regRes->CreateDeviationHistograms( histNamePrefix );
788}
789
790////////////////////////////////////////////////////////////////////////////////
791/// prepare tree branch with the method's discriminating variable
792
794{
795 Data()->SetCurrentType(type);
796
797 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
798
799 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
800 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
801
802 Long64_t nEvents = Data()->GetNEvents();
803
804 // use timer
805 Timer timer( nEvents, GetName(), kTRUE );
806
807 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
808 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
809
810 resMulticlass->Resize( nEvents );
811 Int_t modulo = Int_t(nEvents/100) + 1;
812 for (Int_t ievt=0; ievt<nEvents; ievt++) {
813 Data()->SetCurrentEvent(ievt);
814 std::vector< Float_t > vals = GetMulticlassValues();
815 resMulticlass->SetValue( vals, ievt );
816 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
817 }
818
819 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
820 << "Elapsed time for evaluation of " << nEvents << " events: "
821 << timer.GetElapsedTime() << " " << Endl;
822
823 // store time used for testing
825 SetTestTime(timer.ElapsedSeconds());
826
827 TString histNamePrefix(GetTestvarName());
828 histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
829
830 resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
831 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
832}
833
834////////////////////////////////////////////////////////////////////////////////
835
837 if (err) *err=-1;
838 if (errUpper) *errUpper=-1;
839}
840
841////////////////////////////////////////////////////////////////////////////////
842
844 fTmpEvent = ev;
845 Double_t val = GetMvaValue(err, errUpper);
846 fTmpEvent = 0;
847 return val;
848}
849
850////////////////////////////////////////////////////////////////////////////////
851/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
852/// for a quick determination if an event would be selected as signal or background
853
855 return GetMvaValue()*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
856}
857////////////////////////////////////////////////////////////////////////////////
858/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
859/// for a quick determination if an event with this mva output value would be selected as signal or background
860
862 return mvaVal*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
863}
864
865////////////////////////////////////////////////////////////////////////////////
866/// prepare tree branch with the method's discriminating variable
867
869{
870 Data()->SetCurrentType(type);
871
873 (ResultsClassification*)Data()->GetResults(GetMethodName(), type, Types::kClassification );
874
875 Long64_t nEvents = Data()->GetNEvents();
876 clRes->Resize( nEvents );
877
878 // use timer
879 Timer timer( nEvents, GetName(), kTRUE );
880 std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
881
882 // store time used for testing
884 SetTestTime(timer.ElapsedSeconds());
885
886 // load mva values and type to results object
887 for (Int_t ievt = 0; ievt < nEvents; ievt++) {
888 // note we do not need the trasformed event to get the signal/background information
889 // by calling Data()->GetEvent instead of this->GetEvent we access the untransformed one
890 auto ev = Data()->GetEvent(ievt);
891 clRes->SetValue(mvaValues[ievt], ievt, DataInfo().IsSignal(ev));
892 }
893}
894
895////////////////////////////////////////////////////////////////////////////////
896/// get all the MVA values for the events of the current Data type
898{
899
900 Long64_t nEvents = Data()->GetNEvents();
901 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
902 if (firstEvt < 0) firstEvt = 0;
903 std::vector<Double_t> values(lastEvt-firstEvt);
904 // log in case of looping on all the events
905 nEvents = values.size();
906
907 // use timer
908 Timer timer( nEvents, GetName(), kTRUE );
909
910 if (logProgress)
911 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
912 << "Evaluation of " << GetMethodName() << " on "
913 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
914 << " sample (" << nEvents << " events)" << Endl;
915
916 for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
917 Data()->SetCurrentEvent(ievt);
918 values[ievt] = GetMvaValue();
919
920 // print progress
921 if (logProgress) {
922 Int_t modulo = Int_t(nEvents/100);
923 if (modulo <= 0 ) modulo = 1;
924 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
925 }
926 }
927 if (logProgress) {
928 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
929 << "Elapsed time for evaluation of " << nEvents << " events: "
930 << timer.GetElapsedTime() << " " << Endl;
931 }
932
933 return values;
934}
935
936////////////////////////////////////////////////////////////////////////////////
937/// get all the MVA values for the events of the given Data type
938// (this is used by Method Category and it does not need to be re-implmented by derived classes )
940{
941 fTmpData = data;
942 auto result = GetMvaValues(firstEvt, lastEvt, logProgress);
943 fTmpData = nullptr;
944 return result;
945}
946
947////////////////////////////////////////////////////////////////////////////////
948/// prepare tree branch with the method's discriminating variable
949
951{
952 Data()->SetCurrentType(type);
953
955 (ResultsClassification*)Data()->GetResults(TString("prob_")+GetMethodName(), type, Types::kClassification );
956
957 Long64_t nEvents = Data()->GetNEvents();
958
959 // use timer
960 Timer timer( nEvents, GetName(), kTRUE );
961
962 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
963 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
964
965 mvaProb->Resize( nEvents );
966 Int_t modulo = Int_t(nEvents/100);
967 if (modulo <= 0 ) modulo = 1;
968 for (Int_t ievt=0; ievt<nEvents; ievt++) {
969
970 Data()->SetCurrentEvent(ievt);
971 Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
972 if (proba < 0) break;
973 mvaProb->SetValue( proba, ievt, DataInfo().IsSignal( Data()->GetEvent()) );
974
975 // print progress
976 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
977 }
978
979 Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
980 << "Elapsed time for evaluation of " << nEvents << " events: "
981 << timer.GetElapsedTime() << " " << Endl;
982}
983
984////////////////////////////////////////////////////////////////////////////////
985/// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
986///
987/// - bias = average deviation
988/// - dev = average absolute deviation
989/// - rms = rms of deviation
990
995 Double_t& corr,
997{
998 Types::ETreeType savedType = Data()->GetCurrentType();
999 Data()->SetCurrentType(type);
1000
1001 bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
1002 Double_t sumw = 0;
1003 Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
1004 const Int_t nevt = GetNEvents();
1005 Float_t* rV = new Float_t[nevt];
1006 Float_t* tV = new Float_t[nevt];
1007 Float_t* wV = new Float_t[nevt];
1008 Float_t xmin = 1e30, xmax = -1e30;
1009 Log() << kINFO << "Calculate regression for all events" << Endl;
1010 Timer timer( nevt, GetName(), kTRUE );
1011 Long64_t modulo = Long64_t(nevt / 100) + 1;
1012 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1013
1014 const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
1015 Float_t t = ev->GetTarget(0);
1016 Float_t w = ev->GetWeight();
1017 Float_t r = GetRegressionValues()[0];
1018 Float_t d = (r-t);
1019
1020 // find min/max
1023
1024 // store for truncated RMS computation
1025 rV[ievt] = r;
1026 tV[ievt] = t;
1027 wV[ievt] = w;
1028
1029 // compute deviation-squared
1030 sumw += w;
1031 bias += w * d;
1032 dev += w * TMath::Abs(d);
1033 rms += w * d * d;
1034
1035 // compute correlation between target and regression estimate
1036 m1 += t*w; s1 += t*t*w;
1037 m2 += r*w; s2 += r*r*w;
1038 s12 += t*r;
1039 // print progress
1040 if (ievt % modulo == 0)
1041 timer.DrawProgressBar(ievt);
1042 }
1043 timer.DrawProgressBar(nevt - 1);
1044 Log() << kINFO << "Elapsed time for evaluation of " << nevt << " events: "
1045 << timer.GetElapsedTime() << " " << Endl;
1046
1047 // standard quantities
1048 bias /= sumw;
1049 dev /= sumw;
1050 rms /= sumw;
1052
1053 // correlation
1054 m1 /= sumw;
1055 m2 /= sumw;
1056 corr = s12/sumw - m1*m2;
1057 corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1058
1059 // create histogram required for computation of mutual information
1060 TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1061 TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1062
1063 // compute truncated RMS and fill histogram
1064 Double_t devMax = bias + 2*rms;
1065 Double_t devMin = bias - 2*rms;
1066 sumw = 0;
1067 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1068 Float_t d = (rV[ievt] - tV[ievt]);
1069 hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1070 if (d >= devMin && d <= devMax) {
1071 sumw += wV[ievt];
1072 biasT += wV[ievt] * d;
1073 devT += wV[ievt] * TMath::Abs(d);
1074 rmsT += wV[ievt] * d * d;
1075 histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1076 }
1077 }
1078 biasT /= sumw;
1079 devT /= sumw;
1080 rmsT /= sumw;
1082 mInf = gTools().GetMutualInformation( *hist );
1084
1085 delete hist;
1086 delete histT;
1087
1088 delete [] rV;
1089 delete [] tV;
1090 delete [] wV;
1091
1092 Data()->SetCurrentType(savedType);
1093}
1094
1095
1096////////////////////////////////////////////////////////////////////////////////
1097/// test multiclass classification
1098
1100{
1101 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
1102 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1103
1104 // GA evaluation of best cut for sig eff * sig pur. Slow, disabled for now.
1105 // Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test
1106 // data..." << Endl; for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1107 // resMulticlass->GetBestMultiClassCuts(icls);
1108 // }
1109
1110 // Create histograms for use in TMVA GUI
1111 TString histNamePrefix(GetTestvarName());
1114
1115 resMulticlass->CreateMulticlassHistos(histNamePrefixTest, fNbinsMVAoutput, fNbinsH);
1116 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTest);
1117
1118 resMulticlass->CreateMulticlassHistos(histNamePrefixTrain, fNbinsMVAoutput, fNbinsH);
1119 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTrain);
1120}
1121
1122
1123////////////////////////////////////////////////////////////////////////////////
1124/// initialization
1125
1127{
1128 Data()->SetCurrentType(Types::kTesting);
1129
1131 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
1132
1133 // sanity checks: tree must exist, and theVar must be in tree
1134 if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1135 Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1136 << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1137 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1138 << " not found in tree" << Endl;
1139 }
1140
1141 // basic statistics operations are made in base class
1142 gTools().ComputeStat( GetEventCollection(Types::kTesting), mvaRes->GetValueVector(),
1143 fMeanS, fMeanB, fRmsS, fRmsB, fXmin, fXmax, fSignalClass );
1144
1145 // choose reasonable histogram ranges, by removing outliers
1146 Double_t nrms = 10;
1147 fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1148 fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1149
1150 // determine cut orientation
1151 fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
1152
1153 // fill 2 types of histograms for the various analyses
1154 // this one is for actual plotting
1155
1156 Double_t sxmax = fXmax+0.00001;
1157
1158 // classifier response distributions for training sample
1159 // MVA plots used for graphics representation (signal)
1161 if(IsSilentFile()) {
1162 TestvarName = TString::Format("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1163 } else {
1164 TestvarName=GetTestvarName();
1165 }
1166 TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1167 TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1168 mvaRes->Store(mva_s, "MVA_S");
1169 mvaRes->Store(mva_b, "MVA_B");
1170 mva_s->Sumw2();
1171 mva_b->Sumw2();
1172
1173 TH1* proba_s = 0;
1174 TH1* proba_b = 0;
1175 TH1* rarity_s = 0;
1176 TH1* rarity_b = 0;
1177 if (HasMVAPdfs()) {
1178 // P(MVA) plots used for graphics representation
1179 proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1180 proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1181 mvaRes->Store(proba_s, "Prob_S");
1182 mvaRes->Store(proba_b, "Prob_B");
1183 proba_s->Sumw2();
1184 proba_b->Sumw2();
1185
1186 // R(MVA) plots used for graphics representation
1187 rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1188 rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1189 mvaRes->Store(rarity_s, "Rar_S");
1190 mvaRes->Store(rarity_b, "Rar_B");
1191 rarity_s->Sumw2();
1192 rarity_b->Sumw2();
1193 }
1194
1195 // MVA plots used for efficiency calculations (large number of bins)
1196 TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1197 TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1198 mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1199 mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1200 mva_eff_s->Sumw2();
1201 mva_eff_b->Sumw2();
1202
1203 // fill the histograms
1204
1206 (Data()->GetResults( TString("prob_")+GetMethodName(), Types::kTesting, Types::kMaxAnalysisType ) );
1207
1208 Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1209 if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1210 //std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1211
1212 //LM: this is needed to avoid crashes in ROOCCURVE
1213 if ( mvaRes->GetSize() != GetNEvents() ) {
1214 Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1215 assert(mvaRes->GetSize() == GetNEvents());
1216 }
1217
1218 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1219
1220 const Event* ev = GetEvent(ievt);
1221 Float_t v = (*mvaRes)[ievt][0];
1222 Float_t w = ev->GetWeight();
1223
1224 if (DataInfo().IsSignal(ev)) {
1225 //mvaResTypes->push_back(kTRUE);
1226 mva_s ->Fill( v, w );
1227 if (mvaProb) {
1228 proba_s->Fill( (*mvaProb)[ievt][0], w );
1229 rarity_s->Fill( GetRarity( v ), w );
1230 }
1231
1232 mva_eff_s ->Fill( v, w );
1233 }
1234 else {
1235 //mvaResTypes->push_back(kFALSE);
1236 mva_b ->Fill( v, w );
1237 if (mvaProb) {
1238 proba_b->Fill( (*mvaProb)[ievt][0], w );
1239 rarity_b->Fill( GetRarity( v ), w );
1240 }
1241 mva_eff_b ->Fill( v, w );
1242 }
1243 }
1244
1245 // uncomment those (and several others if you want unnormalized output
1246 gTools().NormHist( mva_s );
1247 gTools().NormHist( mva_b );
1248 gTools().NormHist( proba_s );
1249 gTools().NormHist( proba_b );
1254
1255 // create PDFs from histograms, using default splines, and no additional smoothing
1256 if (fSplS) { delete fSplS; fSplS = 0; }
1257 if (fSplB) { delete fSplB; fSplB = 0; }
1258 fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1259 fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1260}
1261
1262////////////////////////////////////////////////////////////////////////////////
1263/// general method used in writing the header of the weight files where
1264/// the used variables, variable transformation type etc. is specified
1265
1266void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1267{
1268 TString prefix = "";
1270
1271 tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1272 tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1273 tf.setf(std::ios::left);
1274 tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1275 << GetTrainingTMVAVersionCode() << "]" << std::endl;
1276 tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1277 << GetTrainingROOTVersionCode() << "]" << std::endl;
1278 tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1279 tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1280 tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1281 tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1282 tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1283
1284 TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1285
1286 tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1287 tf << prefix << std::endl;
1288
1289 delete userInfo;
1290
1291 // First write all options
1292 tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1293 WriteOptionsToStream( tf, prefix );
1294 tf << prefix << std::endl;
1295
1296 // Second write variable info
1297 tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1298 WriteVarsToStream( tf, prefix );
1299 tf << prefix << std::endl;
1300}
1301
1302////////////////////////////////////////////////////////////////////////////////
1303/// xml writing
1304
1305void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1306{
1307 void* it = gTools().AddChild(gi,"Info");
1308 gTools().AddAttr(it,"name", name);
1309 gTools().AddAttr(it,"value", value);
1310}
1311
1312////////////////////////////////////////////////////////////////////////////////
1313
1315 if (analysisType == Types::kRegression) {
1316 AddRegressionOutput( type );
1317 } else if (analysisType == Types::kMulticlass) {
1318 AddMulticlassOutput( type );
1319 } else {
1320 AddClassifierOutput( type );
1321 if (HasMVAPdfs())
1322 AddClassifierOutputProb( type );
1323 }
1324}
1325
1326////////////////////////////////////////////////////////////////////////////////
1327/// general method used in writing the header of the weight files where
1328/// the used variables, variable transformation type etc. is specified
1329
1330void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1331{
1332 if (!parent) return;
1333
1335
1336 void* gi = gTools().AddChild(parent, "GeneralInfo");
1337 AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1338 AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1339 AddInfoItem( gi, "Creator", userInfo->fUser);
1340 TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1341 AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1342 AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1343 AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1344 AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1345
1346 Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1347 TString analysisType((aType==Types::kRegression) ? "Regression" :
1348 (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1349 AddInfoItem( gi, "AnalysisType", analysisType );
1350 delete userInfo;
1351
1352 // write options
1353 AddOptionsXMLTo( parent );
1354
1355 // write variable info
1356 AddVarsXMLTo( parent );
1357
1358 // write spectator info
1359 if (fModelPersistence)
1360 AddSpectatorsXMLTo( parent );
1361
1362 // write class info if in multiclass mode
1363 AddClassesXMLTo(parent);
1364
1365 // write target info if in regression mode
1366 if (DoRegression()) AddTargetsXMLTo(parent);
1367
1368 // write transformations
1369 GetTransformationHandler(false).AddXMLTo( parent );
1370
1371 // write MVA variable distributions
1372 void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1373 if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1374 if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1375
1376 // write weights
1377 AddWeightsXMLTo( parent );
1378}
1379
1380////////////////////////////////////////////////////////////////////////////////
1381/// write reference MVA distributions (and other information)
1382/// to a ROOT type weight file
1383
1385{
1387 TH1::AddDirectory( 0 ); // this avoids the binding of the hists in PDF to the current ROOT file
1388 fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1389 fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1390
1392
1393 ReadWeightsFromStream( rf );
1394
1395 SetTestvarName();
1396}
1397
1398////////////////////////////////////////////////////////////////////////////////
1399/// write options and weights to file
1400/// note that each one text file for the main configuration information
1401/// and one ROOT file for ROOT objects are created
1402
1404{
1405 // ---- create the text file
1406 TString tfname( GetWeightFileName() );
1407
1408 // writing xml file
1409 TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1410 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1411 << "Creating xml weight file: "
1412 << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1413 void* doc = gTools().xmlengine().NewDoc();
1414 void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1415 gTools().xmlengine().DocSetRootElement(doc,rootnode);
1416 gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1417 WriteStateToXML(rootnode);
1420}
1421
1422////////////////////////////////////////////////////////////////////////////////
1423/// Function to write options and weights to file
1424
1426{
1427 // get the filename
1428
1429 TString tfname(GetWeightFileName());
1430
1431 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1432 << "Reading weight file: "
1433 << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1434
1435 if (tfname.EndsWith(".xml") ) {
1436 void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1437 if (!doc) {
1438 Log() << kFATAL << "Error parsing XML file " << tfname << Endl;
1439 }
1440 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1441 ReadStateFromXML(rootnode);
1443 }
1444 else {
1445 std::filebuf fb;
1446 fb.open(tfname.Data(),std::ios::in);
1447 if (!fb.is_open()) { // file not found --> Error
1448 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1449 << "Unable to open input weight file: " << tfname << Endl;
1450 }
1451 std::istream fin(&fb);
1452 ReadStateFromStream(fin);
1453 fb.close();
1454 }
1455 if (!fTxtWeightsOnly) {
1456 // ---- read the ROOT file
1457 TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1458 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1459 << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1460 TFile* rfile = TFile::Open( rfname, "READ" );
1461 ReadStateFromStream( *rfile );
1462 rfile->Close();
1463 }
1464}
1465////////////////////////////////////////////////////////////////////////////////
1466/// for reading from memory
1467
1469 void* doc = gTools().xmlengine().ParseString(xmlstr);
1470 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1471 ReadStateFromXML(rootnode);
1473
1474 return;
1475}
1476
1477////////////////////////////////////////////////////////////////////////////////
1478
1480{
1481
1483 gTools().ReadAttr( methodNode, "Method", fullMethodName );
1484
1485 fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1486
1487 // update logger
1488 Log().SetSource( GetName() );
1489 Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1490 << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1491
1492 // after the method name is read, the testvar can be set
1493 SetTestvarName();
1494
1495 TString nodeName("");
1496 void* ch = gTools().GetChild(methodNode);
1497 while (ch!=0) {
1498 nodeName = TString( gTools().GetName(ch) );
1499
1500 if (nodeName=="GeneralInfo") {
1501 // read analysis type
1502
1503 TString name(""),val("");
1504 void* antypeNode = gTools().GetChild(ch);
1505 while (antypeNode) {
1506 gTools().ReadAttr( antypeNode, "name", name );
1507
1508 if (name == "TrainingTime")
1509 gTools().ReadAttr( antypeNode, "value", fTrainTime );
1510
1511 if (name == "AnalysisType") {
1512 gTools().ReadAttr( antypeNode, "value", val );
1513 val.ToLower();
1514 if (val == "regression" ) SetAnalysisType( Types::kRegression );
1515 else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1516 else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1517 else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1518 }
1519
1520 if (name == "TMVA Release" || name == "TMVA") {
1521 TString s;
1522 gTools().ReadAttr( antypeNode, "value", s);
1523 fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1524 Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1525 }
1526
1527 if (name == "ROOT Release" || name == "ROOT") {
1528 TString s;
1529 gTools().ReadAttr( antypeNode, "value", s);
1530 fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1531 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1532 << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1533 }
1535 }
1536 }
1537 else if (nodeName=="Options") {
1538 ReadOptionsFromXML(ch);
1539 ParseOptions();
1540
1541 }
1542 else if (nodeName=="Variables") {
1543 ReadVariablesFromXML(ch);
1544 }
1545 else if (nodeName=="Spectators") {
1546 ReadSpectatorsFromXML(ch);
1547 }
1548 else if (nodeName=="Classes") {
1549 if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1550 }
1551 else if (nodeName=="Targets") {
1552 if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1553 }
1554 else if (nodeName=="Transformations") {
1555 GetTransformationHandler().ReadFromXML(ch);
1556 }
1557 else if (nodeName=="MVAPdfs") {
1559 if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1560 if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1561 void* pdfnode = gTools().GetChild(ch);
1562 if (pdfnode) {
1563 gTools().ReadAttr(pdfnode, "Name", pdfname);
1564 fMVAPdfS = new PDF(pdfname);
1565 fMVAPdfS->ReadXML(pdfnode);
1567 gTools().ReadAttr(pdfnode, "Name", pdfname);
1568 fMVAPdfB = new PDF(pdfname);
1569 fMVAPdfB->ReadXML(pdfnode);
1570 }
1571 }
1572 else if (nodeName=="Weights") {
1573 ReadWeightsFromXML(ch);
1574 }
1575 else {
1576 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1577 }
1578 ch = gTools().GetNextChild(ch);
1579
1580 }
1581
1582 // update transformation handler
1583 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1584}
1585
1586////////////////////////////////////////////////////////////////////////////////
1587/// read the header from the weight files of the different MVA methods
1588
1590{
1591 char buf[512];
1592
1593 // when reading from stream, we assume the files are produced with TMVA<=397
1594 SetAnalysisType(Types::kClassification);
1595
1596
1597 // first read the method name
1598 GetLine(fin,buf);
1599 while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1600 TString namestr(buf);
1601
1602 TString methodType = namestr(0,namestr.Index("::"));
1603 methodType = methodType(methodType.Last(' '),methodType.Length());
1605
1606 TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1607 methodName = methodName.Strip(TString::kLeading);
1608 if (methodName == "") methodName = methodType;
1609 fMethodName = methodName;
1610
1611 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1612
1613 // update logger
1614 Log().SetSource( GetName() );
1615
1616 // now the question is whether to read the variables first or the options (well, of course the order
1617 // of writing them needs to agree)
1618 //
1619 // the option "Decorrelation" is needed to decide if the variables we
1620 // read are decorrelated or not
1621 //
1622 // the variables are needed by some methods (TMLP) to build the NN
1623 // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1624 // we read the variables, and then we process the options
1625
1626 // now read all options
1627 GetLine(fin,buf);
1628 while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1629 ReadOptionsFromStream(fin);
1630 ParseOptions();
1631
1632 // Now read variable info
1633 fin.getline(buf,512);
1634 while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1635 ReadVarsFromStream(fin);
1636
1637 // now we process the options (of the derived class)
1638 ProcessOptions();
1639
1640 if (IsNormalised()) {
1642 GetTransformationHandler().AddTransformation( new VariableNormalizeTransform(DataInfo()), -1 );
1643 norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1644 }
1646 if ( fVarTransformString == "None") {
1647 if (fUseDecorr)
1648 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1649 } else if ( fVarTransformString == "Decorrelate" ) {
1650 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1651 } else if ( fVarTransformString == "PCA" ) {
1652 varTrafo = GetTransformationHandler().AddTransformation( new VariablePCATransform(DataInfo()), -1 );
1653 } else if ( fVarTransformString == "Uniform" ) {
1654 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1655 } else if ( fVarTransformString == "Gauss" ) {
1656 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1657 } else if ( fVarTransformString == "GaussDecorr" ) {
1658 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1659 varTrafo2 = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1660 } else {
1661 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1662 << fVarTransformString << "' unknown." << Endl;
1663 }
1664 // Now read decorrelation matrix if available
1665 if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1666 fin.getline(buf,512);
1667 while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1668 if (varTrafo) {
1669 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1670 varTrafo->ReadTransformationFromStream(fin, trafo );
1671 }
1672 if (varTrafo2) {
1673 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1674 varTrafo2->ReadTransformationFromStream(fin, trafo );
1675 }
1676 }
1677
1678
1679 if (HasMVAPdfs()) {
1680 // Now read the MVA PDFs
1681 fin.getline(buf,512);
1682 while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1683 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1684 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1685 fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1686 fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1687 fMVAPdfS->SetReadingVersion( GetTrainingTMVAVersionCode() );
1688 fMVAPdfB->SetReadingVersion( GetTrainingTMVAVersionCode() );
1689
1690 fin >> *fMVAPdfS;
1691 fin >> *fMVAPdfB;
1692 }
1693
1694 // Now read weights
1695 fin.getline(buf,512);
1696 while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1697 fin.getline(buf,512);
1698 ReadWeightsFromStream( fin );
1699
1700 // update transformation handler
1701 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1702
1703}
1704
1705////////////////////////////////////////////////////////////////////////////////
1706/// write the list of variables (name, min, max) for a given data
1707/// transformation method to the stream
1708
1709void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1710{
1711 o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1712 std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1713 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1714 o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1715 varIt = DataInfo().GetSpectatorInfos().begin();
1716 for (; varIt!=DataInfo().GetSpectatorInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1717}
1718
1719////////////////////////////////////////////////////////////////////////////////
1720/// Read the variables (name, min, max) for a given data
1721/// transformation method from the stream. In the stream we only
1722/// expect the limits which will be set
1723
1725{
1726 TString dummy;
1728 istr >> dummy >> readNVar;
1729
1730 if (readNVar!=DataInfo().GetNVariables()) {
1731 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1732 << " while there are " << readNVar << " variables declared in the file"
1733 << Endl;
1734 }
1735
1736 // we want to make sure all variables are read in the order they are defined
1738 std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1739 int varIdx = 0;
1740 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt, ++varIdx) {
1741 varInfo.ReadFromStream(istr);
1742 if (varIt->GetExpression() == varInfo.GetExpression()) {
1743 varInfo.SetExternalLink((*varIt).GetExternalLink());
1744 (*varIt) = varInfo;
1745 }
1746 else {
1747 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1748 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1749 Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1750 Log() << kINFO << "the correct working of the method):" << Endl;
1751 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1752 Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1753 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1754 }
1755 }
1756}
1757
1758////////////////////////////////////////////////////////////////////////////////
1759/// write variable info to XML
1760
1761void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1762{
1763 void* vars = gTools().AddChild(parent, "Variables");
1764 gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1765
1766 for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1767 VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1768 void* var = gTools().AddChild( vars, "Variable" );
1769 gTools().AddAttr( var, "VarIndex", idx );
1770 vi.AddToXML( var );
1771 }
1772}
1773
1774////////////////////////////////////////////////////////////////////////////////
1775/// write spectator info to XML
1776
1778{
1779 void* specs = gTools().AddChild(parent, "Spectators");
1780
1781 UInt_t writeIdx=0;
1782 for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1783
1784 VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1785
1786 // we do not want to write spectators that are category-cuts,
1787 // except if the method is the category method and the spectators belong to it
1788 if (vi.GetVarType()=='C') continue;
1789
1790 void* spec = gTools().AddChild( specs, "Spectator" );
1791 gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1792 vi.AddToXML( spec );
1793 }
1794 gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1795}
1796
1797////////////////////////////////////////////////////////////////////////////////
1798/// write class info to XML
1799
1800void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1801{
1802 UInt_t nClasses=DataInfo().GetNClasses();
1803
1804 void* classes = gTools().AddChild(parent, "Classes");
1805 gTools().AddAttr( classes, "NClass", nClasses );
1806
1807 for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1808 ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1809 TString className =classInfo->GetName();
1810 UInt_t classNumber=classInfo->GetNumber();
1811
1812 void* classNode=gTools().AddChild(classes, "Class");
1813 gTools().AddAttr( classNode, "Name", className );
1814 gTools().AddAttr( classNode, "Index", classNumber );
1815 }
1816}
1817////////////////////////////////////////////////////////////////////////////////
1818/// write target info to XML
1819
1820void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1821{
1822 void* targets = gTools().AddChild(parent, "Targets");
1823 gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1824
1825 for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1826 VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1827 void* tar = gTools().AddChild( targets, "Target" );
1828 gTools().AddAttr( tar, "TargetIndex", idx );
1829 vi.AddToXML( tar );
1830 }
1831}
1832
1833////////////////////////////////////////////////////////////////////////////////
1834/// read variable info from XML
1835
1837{
1839 gTools().ReadAttr( varnode, "NVar", readNVar);
1840
1841 if (readNVar!=DataInfo().GetNVariables()) {
1842 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1843 << " while there are " << readNVar << " variables declared in the file"
1844 << Endl;
1845 }
1846
1847 // we want to make sure all variables are read in the order they are defined
1849 int varIdx = 0;
1850 void* ch = gTools().GetChild(varnode);
1851 while (ch) {
1852 gTools().ReadAttr( ch, "VarIndex", varIdx);
1853 existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1854 readVarInfo.ReadFromXML(ch);
1855
1856 if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1857 readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1859 }
1860 else {
1861 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1862 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1863 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1864 Log() << kINFO << "correct working of the method):" << Endl;
1865 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1866 Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1867 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1868 }
1869 ch = gTools().GetNextChild(ch);
1870 }
1871}
1872
1873////////////////////////////////////////////////////////////////////////////////
1874/// read spectator info from XML
1875
1877{
1879 gTools().ReadAttr( specnode, "NSpec", readNSpec);
1880
1881 if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1882 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1883 << " while there are " << readNSpec << " spectators declared in the file"
1884 << Endl;
1885 }
1886
1887 // we want to make sure all variables are read in the order they are defined
1889 int specIdx = 0;
1890 void* ch = gTools().GetChild(specnode);
1891 while (ch) {
1892 gTools().ReadAttr( ch, "SpecIndex", specIdx);
1893 existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1894 readSpecInfo.ReadFromXML(ch);
1895
1896 if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1897 readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1899 }
1900 else {
1901 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1902 Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1903 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1904 Log() << kINFO << "correct working of the method):" << Endl;
1905 Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1906 Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1907 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1908 }
1909 ch = gTools().GetNextChild(ch);
1910 }
1911}
1912
1913////////////////////////////////////////////////////////////////////////////////
1914/// read number of classes from XML
1915
1917{
1919 // coverity[tainted_data_argument]
1920 gTools().ReadAttr( clsnode, "NClass", readNCls);
1921
1922 TString className="";
1924 void* ch = gTools().GetChild(clsnode);
1925 if (!ch) {
1926 for (UInt_t icls = 0; icls<readNCls;++icls) {
1927 TString classname = TString::Format("class%i",icls);
1928 DataInfo().AddClass(classname);
1929
1930 }
1931 }
1932 else{
1933 while (ch) {
1934 gTools().ReadAttr( ch, "Index", classIndex);
1935 gTools().ReadAttr( ch, "Name", className );
1936 DataInfo().AddClass(className);
1937
1938 ch = gTools().GetNextChild(ch);
1939 }
1940 }
1941
1942 // retrieve signal and background class index
1943 if (DataInfo().GetClassInfo("Signal") != 0) {
1944 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1945 }
1946 else
1947 fSignalClass=0;
1948 if (DataInfo().GetClassInfo("Background") != 0) {
1949 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
1950 }
1951 else
1952 fBackgroundClass=1;
1953}
1954
1955////////////////////////////////////////////////////////////////////////////////
1956/// read target info from XML
1957
1959{
1961 gTools().ReadAttr( tarnode, "NTrgt", readNTar);
1962
1963 int tarIdx = 0;
1964 TString expression;
1965 void* ch = gTools().GetChild(tarnode);
1966 while (ch) {
1967 gTools().ReadAttr( ch, "TargetIndex", tarIdx);
1968 gTools().ReadAttr( ch, "Expression", expression);
1969 DataInfo().AddTarget(expression,"","",0,0);
1970
1971 ch = gTools().GetNextChild(ch);
1972 }
1973}
1974
1975////////////////////////////////////////////////////////////////////////////////
1976/// returns the ROOT directory where info/histograms etc of the
1977/// corresponding MVA method instance are stored
1978
1980{
1981 if (fBaseDir != 0) return fBaseDir;
1982 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
1983
1984 if (IsSilentFile()) {
1985 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
1986 << "MethodBase::BaseDir() - No directory exists when running a Method without output file. Enable the "
1987 "output when creating the factory"
1988 << Endl;
1989 }
1990
1991 TDirectory* methodDir = MethodBaseDir();
1992 if (methodDir==0)
1993 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
1994
1995 TString defaultDir = GetMethodName();
1996 TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
1997 if(!sdir)
1998 {
1999 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
2000 sdir = methodDir->mkdir(defaultDir);
2001 sdir->cd();
2002 // write weight file name into target file
2003 if (fModelPersistence) {
2005 TObjString wfileName( GetWeightFileName() );
2006 wfilePath.Write( "TrainingPath" );
2007 wfileName.Write( "WeightFileName" );
2008 }
2009 }
2010
2011 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
2012 return sdir;
2013}
2014
2015////////////////////////////////////////////////////////////////////////////////
2016/// returns the ROOT directory where all instances of the
2017/// corresponding MVA method are stored
2018
2020{
2021 if (fMethodBaseDir != 0) {
2022 return fMethodBaseDir;
2023 }
2024
2025 const char *datasetName = DataInfo().GetName();
2026
2027 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodTypeName()
2028 << " not set yet --> check if already there.." << Endl;
2029
2030 TDirectory *factoryBaseDir = GetFile();
2031 if (!factoryBaseDir) return nullptr;
2032 fMethodBaseDir = factoryBaseDir->GetDirectory(datasetName);
2033 if (!fMethodBaseDir) {
2034 fMethodBaseDir = factoryBaseDir->mkdir(datasetName, TString::Format("Base directory for dataset %s", datasetName).Data());
2035 if (!fMethodBaseDir) {
2036 Log() << kFATAL << "Can not create dir " << datasetName;
2037 }
2038 }
2039 TString methodTypeDir = TString::Format("Method_%s", GetMethodTypeName().Data());
2040 fMethodBaseDir = fMethodBaseDir->GetDirectory(methodTypeDir.Data());
2041
2042 if (!fMethodBaseDir) {
2044 TString methodTypeDirHelpStr = TString::Format("Directory for all %s methods", GetMethodTypeName().Data());
2045 fMethodBaseDir = datasetDir->mkdir(methodTypeDir.Data(), methodTypeDirHelpStr);
2046 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodName()
2047 << " does not exist yet--> created it" << Endl;
2048 }
2049
2050 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName)
2051 << "Return from MethodBaseDir() after creating base directory " << Endl;
2052 return fMethodBaseDir;
2053}
2054
2055////////////////////////////////////////////////////////////////////////////////
2056/// set directory of weight file
2057
2059{
2060 fFileDir = fileDir;
2061 gSystem->mkdir( fFileDir, kTRUE );
2062}
2063
2064////////////////////////////////////////////////////////////////////////////////
2065/// set the weight file name (depreciated)
2066
2071
2072////////////////////////////////////////////////////////////////////////////////
2073/// retrieve weight file name
2074
2076{
2077 if (fWeightFile!="") return fWeightFile;
2078
2079 // the default consists of
2080 // directory/jobname_methodname_suffix.extension.{root/txt}
2081 TString suffix = "";
2082 TString wFileDir(GetWeightFileDir());
2083 TString wFileName = GetJobName() + "_" + GetMethodName() +
2084 suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml";
2085 if (wFileDir.IsNull() ) return wFileName;
2086 // add weight file directory of it is not null
2087 return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2088 + wFileName );
2089}
2090////////////////////////////////////////////////////////////////////////////////
2091/// writes all MVA evaluation histograms to file
2092
2094{
2095 BaseDir()->cd();
2096
2097
2098 // write MVA PDFs to file - if exist
2099 if (0 != fMVAPdfS) {
2100 fMVAPdfS->GetOriginalHist()->Write();
2101 fMVAPdfS->GetSmoothedHist()->Write();
2102 fMVAPdfS->GetPDFHist()->Write();
2103 }
2104 if (0 != fMVAPdfB) {
2105 fMVAPdfB->GetOriginalHist()->Write();
2106 fMVAPdfB->GetSmoothedHist()->Write();
2107 fMVAPdfB->GetPDFHist()->Write();
2108 }
2109
2110 // write result-histograms
2111 Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2112 if (!results)
2113 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2114 << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2115 << "/kMaxAnalysisType" << Endl;
2116 results->GetStorage()->Write();
2118 // skipping plotting of variables if too many (default is 200)
2119 if ((int) DataInfo().GetNVariables()< gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables)
2120 GetTransformationHandler().PlotVariables (GetEventCollection( Types::kTesting ), BaseDir() );
2121 else
2122 Log() << kINFO << TString::Format("Dataset[%s] : ",DataInfo().GetName())
2123 << " variable plots are not produces ! The number of variables is " << DataInfo().GetNVariables()
2124 << " , it is larger than " << gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables << Endl;
2125 }
2126}
2127
2128////////////////////////////////////////////////////////////////////////////////
2129/// write special monitoring histograms to file
2130/// dummy implementation here -----------------
2131
2135
2136////////////////////////////////////////////////////////////////////////////////
2137/// reads one line from the input stream
2138/// checks for certain keywords and interprets
2139/// the line if keywords are found
2140
2141Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2142{
2143 fin.getline(buf,512);
2144 TString line(buf);
2145 if (line.BeginsWith("TMVA Release")) {
2146 Ssiz_t start = line.First('[')+1;
2147 Ssiz_t length = line.Index("]",start)-start;
2148 TString code = line(start,length);
2149 std::stringstream s(code.Data());
2150 s >> fTMVATrainingVersion;
2151 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2152 }
2153 if (line.BeginsWith("ROOT Release")) {
2154 Ssiz_t start = line.First('[')+1;
2155 Ssiz_t length = line.Index("]",start)-start;
2156 TString code = line(start,length);
2157 std::stringstream s(code.Data());
2158 s >> fROOTTrainingVersion;
2159 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2160 }
2161 if (line.BeginsWith("Analysis type")) {
2162 Ssiz_t start = line.First('[')+1;
2163 Ssiz_t length = line.Index("]",start)-start;
2164 TString code = line(start,length);
2165 std::stringstream s(code.Data());
2166 std::string analysisType;
2167 s >> analysisType;
2168 if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2169 else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2170 else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2171 else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2172
2173 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2174 << (GetAnalysisType() == Types::kRegression ? "Regression" :
2175 (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2176 }
2177
2178 return true;
2179}
2180
2181////////////////////////////////////////////////////////////////////////////////
2182/// Create PDFs of the MVA output variables
2183
2185{
2186 Data()->SetCurrentType(Types::kTraining);
2187
2188 // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2189 // otherwise they will be only used 'online'
2191 ( Data()->GetResults(GetMethodName(), Types::kTraining, Types::kClassification) );
2192
2193 if (mvaRes==0 || mvaRes->GetSize()==0) {
2194 Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2195 }
2196
2197 Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2198 Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2199
2200 // create histograms that serve as basis to create the MVA Pdfs
2201 TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2202 fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2203 TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2204 fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2205
2206
2207 // compute sum of weights properly
2208 histMVAPdfS->Sumw2();
2209 histMVAPdfB->Sumw2();
2210
2211 // fill histograms
2212 for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2213 Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2214 Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2215
2216 if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2217 else histMVAPdfB->Fill( theVal, theWeight );
2218 }
2219
2222
2223 // momentary hack for ROOT problem
2224 if(!IsSilentFile())
2225 {
2226 histMVAPdfS->Write();
2227 histMVAPdfB->Write();
2228 }
2229 // create PDFs
2230 fMVAPdfS->BuildPDF ( histMVAPdfS );
2231 fMVAPdfB->BuildPDF ( histMVAPdfB );
2232 fMVAPdfS->ValidatePDF( histMVAPdfS );
2233 fMVAPdfB->ValidatePDF( histMVAPdfB );
2234
2235 if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2236 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2237 << TString::Format( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2238 GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2239 << Endl;
2240 }
2241
2242 delete histMVAPdfS;
2243 delete histMVAPdfB;
2244}
2245
2247 // the simple one, automatically calculates the mvaVal and uses the
2248 // SAME sig/bkg ratio as given in the training sample (typically 50/50
2249 // .. (NormMode=EqualNumEvents) but can be different)
2250 if (!fMVAPdfS || !fMVAPdfB) {
2251 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2252 CreateMVAPdfs();
2253 }
2254 Double_t sigFraction = DataInfo().GetTrainingSumSignalWeights() / (DataInfo().GetTrainingSumSignalWeights() + DataInfo().GetTrainingSumBackgrWeights() );
2255 Double_t mvaVal = GetMvaValue(ev);
2256
2257 return GetProba(mvaVal,sigFraction);
2258
2259}
2260////////////////////////////////////////////////////////////////////////////////
2261/// compute likelihood ratio
2262
2264{
2265 if (!fMVAPdfS || !fMVAPdfB) {
2266 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2267 return -1.0;
2268 }
2269 Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2270 Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2271
2272 Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2273
2274 return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2275}
2276
2277////////////////////////////////////////////////////////////////////////////////
2278/// compute rarity:
2279/// \f[
2280/// R(x) = \int_{[-\infty..x]} { PDF(x') dx' }
2281/// \f]
2282/// where PDF(x) is the PDF of the classifier's signal or background distribution
2283
2285{
2286 if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2287 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2288 << "select option \"CreateMVAPdfs\"" << Endl;
2289 return 0.0;
2290 }
2291
2292 PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2293
2294 return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2295}
2296
2297////////////////////////////////////////////////////////////////////////////////
2298/// fill background efficiency (resp. rejection) versus signal efficiency plots
2299/// returns signal efficiency at background efficiency indicated in theString
2300
2302{
2303 Data()->SetCurrentType(type);
2304 Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2305 std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2306
2307 // parse input string for required background efficiency
2309
2310 // sanity check
2312 if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2313 else if (list->GetSize() > 2) {
2314 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2315 << " in string: " << theString
2316 << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2317 delete list;
2318 return -1;
2319 }
2320
2321 // sanity check
2322 if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2323 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2324 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2325 delete list;
2326 return -1.0;
2327 }
2328
2329 // create histograms
2330
2331 // first, get efficiency histograms for signal and background
2332 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2333 Double_t xmin = effhist->GetXaxis()->GetXmin();
2334 Double_t xmax = effhist->GetXaxis()->GetXmax();
2335
2337
2338 // first round ? --> create histograms
2339 if (results->DoesExist("MVA_EFF_S")==0) {
2340
2341 // for efficiency plot
2342 TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2343 TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2344 results->Store(eff_s, "MVA_EFF_S");
2345 results->Store(eff_b, "MVA_EFF_B");
2346
2347 // sign if cut
2348 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2349
2350 // this method is unbinned
2351 nevtS = 0;
2352 for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2353
2354 // read the tree
2355 Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2356 Float_t theWeight = GetEvent(ievt)->GetWeight();
2357 Float_t theVal = (*mvaRes)[ievt];
2358
2359 // select histogram depending on if sig or bgd
2361
2362 // count signal and background events in tree
2363 if (isSignal) nevtS+=theWeight;
2364
2365 TAxis* axis = theHist->GetXaxis();
2366 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2367 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2368 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2369 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2370 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2371
2372 if (sign > 0)
2373 for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2374 else if (sign < 0)
2375 for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2376 else
2377 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2378 }
2379
2380 // renormalise maximum to <=1
2381 // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2382 // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2383
2384 eff_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_s->GetMaximum()) );
2385 eff_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_b->GetMaximum()) );
2386
2387 // background efficiency versus signal efficiency
2388 TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2389 results->Store(eff_BvsS, "MVA_EFF_BvsS");
2390 eff_BvsS->SetXTitle( "Signal eff" );
2391 eff_BvsS->SetYTitle( "Backgr eff" );
2392
2393 // background rejection (=1-eff.) versus signal efficiency
2394 TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2395 results->Store(rej_BvsS);
2396 rej_BvsS->SetXTitle( "Signal eff" );
2397 rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2398
2399 // inverse background eff (1/eff.) versus signal efficiency
2400 TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2401 GetTestvarName(), fNbins, 0, 1 );
2402 results->Store(inveff_BvsS);
2403 inveff_BvsS->SetXTitle( "Signal eff" );
2404 inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2405
2406 // use root finder
2407 // spline background efficiency plot
2408 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2410 fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2411 fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2412
2413 // verify spline sanity
2414 gTools().CheckSplines( eff_s, fSplRefS );
2415 gTools().CheckSplines( eff_b, fSplRefB );
2416 }
2417
2418 // make the background-vs-signal efficiency plot
2419
2420 // create root finder
2421 RootFinder rootFinder( this, fXmin, fXmax );
2422
2423 Double_t effB = 0;
2424 fEffS = eff_s; // to be set for the root finder
2425 for (Int_t bini=1; bini<=fNbins; bini++) {
2426
2427 // find cut value corresponding to a given signal efficiency
2428 Double_t effS = eff_BvsS->GetBinCenter( bini );
2429 Double_t cut = rootFinder.Root( effS );
2430
2431 // retrieve background efficiency for given cut
2432 if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2433 else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2434
2435 // and fill histograms
2436 eff_BvsS->SetBinContent( bini, effB );
2437 rej_BvsS->SetBinContent( bini, 1.0-effB );
2438 if (effB>std::numeric_limits<double>::epsilon())
2439 inveff_BvsS->SetBinContent( bini, 1.0/effB );
2440 }
2441
2442 // create splines for histogram
2443 fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2444
2445 // search for overlap point where, when cutting on it,
2446 // one would obtain: eff_S = rej_B = 1 - eff_B
2447 Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2448 Int_t nbins_ = 5000;
2449 for (Int_t bini=1; bini<=nbins_; bini++) {
2450
2451 // get corresponding signal and background efficiencies
2452 effS = (bini - 0.5)/Float_t(nbins_);
2453 rejB = 1.0 - fSpleffBvsS->Eval( effS );
2454
2455 // find signal efficiency that corresponds to required background efficiency
2456 if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2457 effS_ = effS;
2458 rejB_ = rejB;
2459 }
2460
2461 // find cut that corresponds to signal efficiency and update signal-like criterion
2462 Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2463 SetSignalReferenceCut( cut );
2464 fEffS = 0;
2465 }
2466
2467 // must exist...
2468 if (0 == fSpleffBvsS) {
2469 delete list;
2470 return 0.0;
2471 }
2472
2473 // now find signal efficiency that corresponds to required background efficiency
2474 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2475 Int_t nbins_ = 1000;
2476
2477 if (computeArea) {
2478
2479 // compute area of rej-vs-eff plot
2480 Double_t integral = 0;
2481 for (Int_t bini=1; bini<=nbins_; bini++) {
2482
2483 // get corresponding signal and background efficiencies
2484 effS = (bini - 0.5)/Float_t(nbins_);
2485 effB = fSpleffBvsS->Eval( effS );
2486 integral += (1.0 - effB);
2487 }
2488 integral /= nbins_;
2489
2490 delete list;
2491 return integral;
2492 }
2493 else {
2494
2495 // that will be the value of the efficiency retured (does not affect
2496 // the efficiency-vs-bkg plot which is done anyway.
2497 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2498
2499 // find precise efficiency value
2500 for (Int_t bini=1; bini<=nbins_; bini++) {
2501
2502 // get corresponding signal and background efficiencies
2503 effS = (bini - 0.5)/Float_t(nbins_);
2504 effB = fSpleffBvsS->Eval( effS );
2505
2506 // find signal efficiency that corresponds to required background efficiency
2507 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2508 effS_ = effS;
2509 effB_ = effB;
2510 }
2511
2512 // take mean between bin above and bin below
2513 effS = 0.5*(effS + effS_);
2514
2515 effSerr = 0;
2516 if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2517
2518 delete list;
2519 return effS;
2520 }
2521
2522 return -1;
2523}
2524
2525////////////////////////////////////////////////////////////////////////////////
2526
2528{
2529 Data()->SetCurrentType(Types::kTraining);
2530
2531 Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, Types::kNoAnalysisType);
2532
2533 // fill background efficiency (resp. rejection) versus signal efficiency plots
2534 // returns signal efficiency at background efficiency indicated in theString
2535
2536 // parse input string for required background efficiency
2538 // sanity check
2539
2540 if (list->GetSize() != 2) {
2541 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2542 << " in string: " << theString
2543 << " | required format, e.g., Efficiency:0.05" << Endl;
2544 delete list;
2545 return -1;
2546 }
2547 // that will be the value of the efficiency retured (does not affect
2548 // the efficiency-vs-bkg plot which is done anyway.
2549 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2550
2551 delete list;
2552
2553 // sanity check
2554 if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2555 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2556 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2557 << Endl;
2558 return -1.0;
2559 }
2560
2561 // create histogram
2562
2563 // first, get efficiency histograms for signal and background
2564 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2565 Double_t xmin = effhist->GetXaxis()->GetXmin();
2566 Double_t xmax = effhist->GetXaxis()->GetXmax();
2567
2568 // first round ? --> create and fill histograms
2569 if (results->DoesExist("MVA_TRAIN_S")==0) {
2570
2571 // classifier response distributions for test sample
2572 Double_t sxmax = fXmax+0.00001;
2573
2574 // MVA plots on the training sample (check for overtraining)
2575 TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2576 TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2577 results->Store(mva_s_tr, "MVA_TRAIN_S");
2578 results->Store(mva_b_tr, "MVA_TRAIN_B");
2579 mva_s_tr->Sumw2();
2580 mva_b_tr->Sumw2();
2581
2582 // Training efficiency plots
2583 TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2584 fNbinsH, xmin, xmax );
2585 TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2586 fNbinsH, xmin, xmax );
2587 results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2588 results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2589
2590 // sign if cut
2591 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2592
2593 std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2594 assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2595
2596 // this method is unbinned
2597 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2598
2599 Data()->SetCurrentEvent(ievt);
2600 const Event* ev = GetEvent();
2601
2602 Double_t theVal = mvaValues[ievt];
2603 Double_t theWeight = ev->GetWeight();
2604
2605 TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2606 TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2607
2608 theClsHist->Fill( theVal, theWeight );
2609
2610 TAxis* axis = theEffHist->GetXaxis();
2611 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2612 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2613 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2614 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2615 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2616
2617 if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2618 else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2619 }
2620
2621 // normalise output distributions
2622 // uncomment those (and several others if you want unnormalized output
2625
2626 // renormalise to maximum
2627 mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2628 mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2629
2630 // Training background efficiency versus signal efficiency
2631 TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2632 // Training background rejection (=1-eff.) versus signal efficiency
2633 TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2634 results->Store(eff_bvss, "EFF_BVSS_TR");
2635 results->Store(rej_bvss, "REJ_BVSS_TR");
2636
2637 // use root finder
2638 // spline background efficiency plot
2639 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2641 if (fSplTrainRefS) delete fSplTrainRefS;
2642 if (fSplTrainRefB) delete fSplTrainRefB;
2643 fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2644 fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2645
2646 // verify spline sanity
2647 gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2648 gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2649 }
2650
2651 // make the background-vs-signal efficiency plot
2652
2653 // create root finder
2654 RootFinder rootFinder(this, fXmin, fXmax );
2655
2656 Double_t effB = 0;
2657 fEffS = results->GetHist("MVA_TRAINEFF_S");
2658 for (Int_t bini=1; bini<=fNbins; bini++) {
2659
2660 // find cut value corresponding to a given signal efficiency
2661 Double_t effS = eff_bvss->GetBinCenter( bini );
2662
2663 Double_t cut = rootFinder.Root( effS );
2664
2665 // retrieve background efficiency for given cut
2666 if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2667 else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2668
2669 // and fill histograms
2670 eff_bvss->SetBinContent( bini, effB );
2671 rej_bvss->SetBinContent( bini, 1.0-effB );
2672 }
2673 fEffS = 0;
2674
2675 // create splines for histogram
2676 fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2677 }
2678
2679 // must exist...
2680 if (0 == fSplTrainEffBvsS) return 0.0;
2681
2682 // now find signal efficiency that corresponds to required background efficiency
2683 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2684 Int_t nbins_ = 1000;
2685 for (Int_t bini=1; bini<=nbins_; bini++) {
2686
2687 // get corresponding signal and background efficiencies
2688 effS = (bini - 0.5)/Float_t(nbins_);
2689 effB = fSplTrainEffBvsS->Eval( effS );
2690
2691 // find signal efficiency that corresponds to required background efficiency
2692 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2693 effS_ = effS;
2694 effB_ = effB;
2695 }
2696
2697 return 0.5*(effS + effS_); // the mean between bin above and bin below
2698}
2699
2700////////////////////////////////////////////////////////////////////////////////
2701
2702std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2703{
2704 Data()->SetCurrentType(Types::kTesting);
2705 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
2706 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2707
2708 purity.push_back(resMulticlass->GetAchievablePur());
2709 return resMulticlass->GetAchievableEff();
2710}
2711
2712////////////////////////////////////////////////////////////////////////////////
2713
2714std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2715{
2716 Data()->SetCurrentType(Types::kTraining);
2717 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTraining, Types::kMulticlass));
2718 if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2719
2720 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2721 for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2722 resMulticlass->GetBestMultiClassCuts(icls);
2723 }
2724
2725 purity.push_back(resMulticlass->GetAchievablePur());
2726 return resMulticlass->GetAchievableEff();
2727}
2728
2729////////////////////////////////////////////////////////////////////////////////
2730/// Construct a confusion matrix for a multiclass classifier. The confusion
2731/// matrix compares, in turn, each class agaist all other classes in a pair-wise
2732/// fashion. In rows with index \f$ k_r = 0 ... K \f$, \f$ k_r \f$ is
2733/// considered signal for the sake of comparison and for each column
2734/// \f$ k_c = 0 ... K \f$ the corresponding class is considered background.
2735///
2736/// Note that the diagonal elements will be returned as NaN since this will
2737/// compare a class against itself.
2738///
2739/// \see TMVA::ResultsMulticlass::GetConfusionMatrix
2740///
2741/// \param[in] effB The background efficiency for which to evaluate.
2742/// \param[in] type The data set on which to evaluate (training, testing ...).
2743///
2744/// \return A matrix containing signal efficiencies for the given background
2745/// efficiency. The diagonal elements are NaN since this measure is
2746/// meaningless (comparing a class against itself).
2747///
2748
2750{
2751 if (GetAnalysisType() != Types::kMulticlass) {
2752 Log() << kFATAL << "Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2753 return TMatrixD(0, 0);
2754 }
2755
2756 Data()->SetCurrentType(type);
2758 dynamic_cast<ResultsMulticlass *>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
2759
2760 if (resMulticlass == nullptr) {
2761 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2762 << "unable to create pointer in GetMulticlassEfficiency, exiting." << Endl;
2763 return TMatrixD(0, 0);
2764 }
2765
2766 return resMulticlass->GetConfusionMatrix(effB);
2767}
2768
2769////////////////////////////////////////////////////////////////////////////////
2770/// compute significance of mean difference
2771/// \f[
2772/// significance = \frac{|<S> - <B>|}{\sqrt{RMS_{S2} + RMS_{B2}}}
2773/// \f]
2774
2776{
2777 Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2778
2779 return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2780}
2781
2782////////////////////////////////////////////////////////////////////////////////
2783/// compute "separation" defined as
2784/// \f[
2785/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2786/// \f]
2787
2792
2793////////////////////////////////////////////////////////////////////////////////
2794/// compute "separation" defined as
2795/// \f[
2796/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2797/// \f]
2798
2800{
2801 // note, if zero pointers given, use internal pdf
2802 // sanity check first
2803 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2804 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2805 if (!pdfS) pdfS = fSplS;
2806 if (!pdfB) pdfB = fSplB;
2807
2808 if (!fSplS || !fSplB) {
2809 Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2810 << " fSplS or fSplB are not yet filled" << Endl;
2811 return 0;
2812 }else{
2813 return gTools().GetSeparation( *pdfS, *pdfB );
2814 }
2815}
2816
2817////////////////////////////////////////////////////////////////////////////////
2818/// calculate the area (integral) under the ROC curve as a
2819/// overall quality measure of the classification
2820
2822{
2823 // note, if zero pointers given, use internal pdf
2824 // sanity check first
2825 if ((!histS && histB) || (histS && !histB))
2826 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2827
2828 if (histS==0 || histB==0) return 0.;
2829
2830 TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2831 TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2832
2833
2834 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2835 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2836
2837 Double_t integral = 0;
2838 UInt_t nsteps = 1000;
2839 Double_t step = (xmax-xmin)/Double_t(nsteps);
2840 Double_t cut = xmin;
2841 for (UInt_t i=0; i<nsteps; i++) {
2842 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2843 cut+=step;
2844 }
2845 delete pdfS;
2846 delete pdfB;
2847 return integral*step;
2848}
2849
2850
2851////////////////////////////////////////////////////////////////////////////////
2852/// calculate the area (integral) under the ROC curve as a
2853/// overall quality measure of the classification
2854
2856{
2857 // note, if zero pointers given, use internal pdf
2858 // sanity check first
2859 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2860 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2861 if (!pdfS) pdfS = fSplS;
2862 if (!pdfB) pdfB = fSplB;
2863
2864 if (pdfS==0 || pdfB==0) return 0.;
2865
2866 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2867 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2868
2869 Double_t integral = 0;
2870 UInt_t nsteps = 1000;
2871 Double_t step = (xmax-xmin)/Double_t(nsteps);
2872 Double_t cut = xmin;
2873 for (UInt_t i=0; i<nsteps; i++) {
2874 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2875 cut+=step;
2876 }
2877 return integral*step;
2878}
2879
2880////////////////////////////////////////////////////////////////////////////////
2881/// plot significance, \f$ \frac{S}{\sqrt{S^2 + B^2}} \f$, curve for given number
2882/// of signal and background events; returns cut for maximum significance
2883/// also returned via reference is the maximum significance
2884
2888{
2889 Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, Types::kMaxAnalysisType );
2890
2892 Double_t effS(0),effB(0),significance(0);
2893 TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2894
2895 if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2896 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2897 << "Number of signal or background events is <= 0 ==> abort"
2898 << Endl;
2899 }
2900
2901 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2903
2904 TH1* eff_s = results->GetHist("MVA_EFF_S");
2905 TH1* eff_b = results->GetHist("MVA_EFF_B");
2906
2907 if ( (eff_s==0) || (eff_b==0) ) {
2908 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2909 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2910 return 0;
2911 }
2912
2913 for (Int_t bin=1; bin<=fNbinsH; bin++) {
2914 effS = eff_s->GetBinContent( bin );
2915 effB = eff_b->GetBinContent( bin );
2916
2917 // put significance into a histogram
2919
2920 temp_histogram->SetBinContent(bin,significance);
2921 }
2922
2923 // find maximum in histogram
2924 max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
2925 max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
2926
2927 // delete
2928 delete temp_histogram;
2929
2930 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
2931 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
2932
2933 return max_significance;
2934}
2935
2936////////////////////////////////////////////////////////////////////////////////
2937/// calculates rms,mean, xmin, xmax of the event variable
2938/// this can be either done for the variables as they are or for
2939/// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
2940
2945{
2946 Types::ETreeType previousTreeType = Data()->GetCurrentType();
2947 Data()->SetCurrentType(treeType);
2948
2949 Long64_t entries = Data()->GetNEvents();
2950
2951 // sanity check
2952 if (entries <=0)
2953 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
2954
2955 // index of the wanted variable
2956 UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
2957
2958 // first fill signal and background in arrays before analysis
2959 xmin = +DBL_MAX;
2960 xmax = -DBL_MAX;
2961
2962 // take into account event weights
2963 meanS = 0;
2964 meanB = 0;
2965 rmsS = 0;
2966 rmsB = 0;
2967 Double_t sumwS = 0, sumwB = 0;
2968
2969 // loop over all training events
2970 for (Int_t ievt = 0; ievt < entries; ievt++) {
2971
2972 const Event* ev = GetEvent(ievt);
2973
2974 Double_t theVar = ev->GetValue(varIndex);
2975 Double_t weight = ev->GetWeight();
2976
2977 if (DataInfo().IsSignal(ev)) {
2978 sumwS += weight;
2979 meanS += weight*theVar;
2980 rmsS += weight*theVar*theVar;
2981 }
2982 else {
2983 sumwB += weight;
2984 meanB += weight*theVar;
2985 rmsB += weight*theVar*theVar;
2986 }
2987 xmin = TMath::Min( xmin, theVar );
2988 xmax = TMath::Max( xmax, theVar );
2989 }
2990
2991 meanS = meanS/sumwS;
2992 meanB = meanB/sumwB;
2995
2996 Data()->SetCurrentType(previousTreeType);
2997}
2998
2999////////////////////////////////////////////////////////////////////////////////
3000/// create reader class for method (classification only at present)
3001
3003{
3004 // the default consists of
3006 if (theClassFileName == "")
3007 classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
3008 else
3010
3011 TString className = TString("Read") + GetMethodName();
3012
3014 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
3015 << "Creating standalone class: "
3016 << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
3017
3018 std::ofstream fout( classFileName );
3019 if (!fout.good()) { // file could not be opened --> Error
3020 Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
3021 }
3022
3023 // now create the class
3024 // preamble
3025 fout << "// Class: " << className << std::endl;
3026 fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
3027
3028 // print general information and configuration state
3029 fout << std::endl;
3030 fout << "/* configuration options =====================================================" << std::endl << std::endl;
3031 WriteStateToStream( fout );
3032 fout << std::endl;
3033 fout << "============================================================================ */" << std::endl;
3034
3035 // generate the class
3036 fout << "" << std::endl;
3037 fout << "#include <array>" << std::endl;
3038 fout << "#include <vector>" << std::endl;
3039 fout << "#include <cmath>" << std::endl;
3040 fout << "#include <string>" << std::endl;
3041 fout << "#include <iostream>" << std::endl;
3042 fout << "" << std::endl;
3043 // now if the classifier needs to write some additional classes for its response implementation
3044 // this code goes here: (at least the header declarations need to come before the main class
3045 this->MakeClassSpecificHeader( fout, className );
3046
3047 fout << "#ifndef IClassifierReader__def" << std::endl;
3048 fout << "#define IClassifierReader__def" << std::endl;
3049 fout << std::endl;
3050 fout << "class IClassifierReader {" << std::endl;
3051 fout << std::endl;
3052 fout << " public:" << std::endl;
3053 fout << std::endl;
3054 fout << " // constructor" << std::endl;
3055 fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
3056 fout << " virtual ~IClassifierReader() {}" << std::endl;
3057 fout << std::endl;
3058 fout << " // return classifier response" << std::endl;
3059 if(GetAnalysisType() == Types::kMulticlass) {
3060 fout << " virtual std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3061 } else {
3062 fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3063 }
3064 fout << std::endl;
3065 fout << " // returns classifier status" << std::endl;
3066 fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3067 fout << std::endl;
3068 fout << " protected:" << std::endl;
3069 fout << std::endl;
3070 fout << " bool fStatusIsClean;" << std::endl;
3071 fout << "};" << std::endl;
3072 fout << std::endl;
3073 fout << "#endif" << std::endl;
3074 fout << std::endl;
3075 fout << "class " << className << " : public IClassifierReader {" << std::endl;
3076 fout << std::endl;
3077 fout << " public:" << std::endl;
3078 fout << std::endl;
3079 fout << " // constructor" << std::endl;
3080 fout << " " << className << "( std::vector<std::string>& theInputVars )" << std::endl;
3081 fout << " : IClassifierReader()," << std::endl;
3082 fout << " fClassName( \"" << className << "\" )," << std::endl;
3083 fout << " fNvars( " << GetNvar() << " )" << std::endl;
3084 fout << " {" << std::endl;
3085 fout << " // the training input variables" << std::endl;
3086 fout << " const char* inputVars[] = { ";
3087 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3088 fout << "\"" << GetOriginalVarName(ivar) << "\"";
3089 if (ivar<GetNvar()-1) fout << ", ";
3090 }
3091 fout << " };" << std::endl;
3092 fout << std::endl;
3093 fout << " // sanity checks" << std::endl;
3094 fout << " if (theInputVars.size() <= 0) {" << std::endl;
3095 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3096 fout << " fStatusIsClean = false;" << std::endl;
3097 fout << " }" << std::endl;
3098 fout << std::endl;
3099 fout << " if (theInputVars.size() != fNvars) {" << std::endl;
3100 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3101 fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3102 fout << " fStatusIsClean = false;" << std::endl;
3103 fout << " }" << std::endl;
3104 fout << std::endl;
3105 fout << " // validate input variables" << std::endl;
3106 fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3107 fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3108 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3109 fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3110 fout << " fStatusIsClean = false;" << std::endl;
3111 fout << " }" << std::endl;
3112 fout << " }" << std::endl;
3113 fout << std::endl;
3114 fout << " // initialize min and max vectors (for normalisation)" << std::endl;
3115 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3116 fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
3117 fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
3118 }
3119 fout << std::endl;
3120 fout << " // initialize input variable types" << std::endl;
3121 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3122 fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3123 }
3124 fout << std::endl;
3125 fout << " // initialize constants" << std::endl;
3126 fout << " Initialize();" << std::endl;
3127 fout << std::endl;
3128 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3129 fout << " // initialize transformation" << std::endl;
3130 fout << " InitTransform();" << std::endl;
3131 }
3132 fout << " }" << std::endl;
3133 fout << std::endl;
3134 fout << " // destructor" << std::endl;
3135 fout << " virtual ~" << className << "() {" << std::endl;
3136 fout << " Clear(); // method-specific" << std::endl;
3137 fout << " }" << std::endl;
3138 fout << std::endl;
3139 fout << " // the classifier response" << std::endl;
3140 fout << " // \"inputValues\" is a vector of input values in the same order as the" << std::endl;
3141 fout << " // variables given to the constructor" << std::endl;
3142 if(GetAnalysisType() == Types::kMulticlass) {
3143 fout << " std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const override;" << std::endl;
3144 } else {
3145 fout << " double GetMvaValue( const std::vector<double>& inputValues ) const override;" << std::endl;
3146 }
3147 fout << std::endl;
3148 fout << " private:" << std::endl;
3149 fout << std::endl;
3150 fout << " // method-specific destructor" << std::endl;
3151 fout << " void Clear();" << std::endl;
3152 fout << std::endl;
3153 if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3154 fout << " // input variable transformation" << std::endl;
3155 GetTransformationHandler().MakeFunction(fout, className,1);
3156 fout << " void InitTransform();" << std::endl;
3157 fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3158 fout << std::endl;
3159 }
3160 fout << " // common member variables" << std::endl;
3161 fout << " const char* fClassName;" << std::endl;
3162 fout << std::endl;
3163 fout << " const size_t fNvars;" << std::endl;
3164 fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3165 fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3166 fout << std::endl;
3167 fout << " // normalisation of input variables" << std::endl;
3168 fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3169 fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3170 fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3171 fout << " // normalise to output range: [-1, 1]" << std::endl;
3172 fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3173 fout << " }" << std::endl;
3174 fout << std::endl;
3175 fout << " // type of input variable: 'F' or 'I'" << std::endl;
3176 fout << " char fType[" << GetNvar() << "];" << std::endl;
3177 fout << std::endl;
3178 fout << " // initialize internal variables" << std::endl;
3179 fout << " void Initialize();" << std::endl;
3180 if(GetAnalysisType() == Types::kMulticlass) {
3181 fout << " std::vector<double> GetMulticlassValues__( const std::vector<double>& inputValues ) const;" << std::endl;
3182 } else {
3183 fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3184 }
3185 fout << "" << std::endl;
3186 fout << " // private members (method specific)" << std::endl;
3187
3188 // call the classifier specific output (the classifier must close the class !)
3189 MakeClassSpecific( fout, className );
3190
3191 if(GetAnalysisType() == Types::kMulticlass) {
3192 fout << "inline std::vector<double> " << className << "::GetMulticlassValues( const std::vector<double>& inputValues ) const" << std::endl;
3193 } else {
3194 fout << "inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3195 }
3196 fout << "{" << std::endl;
3197 fout << " // classifier response value" << std::endl;
3198 if(GetAnalysisType() == Types::kMulticlass) {
3199 fout << " std::vector<double> retval;" << std::endl;
3200 } else {
3201 fout << " double retval = 0;" << std::endl;
3202 }
3203 fout << std::endl;
3204 fout << " // classifier response, sanity check first" << std::endl;
3205 fout << " if (!IsStatusClean()) {" << std::endl;
3206 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3207 fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3208 fout << " }" << std::endl;
3209 fout << " else {" << std::endl;
3210 if (IsNormalised()) {
3211 fout << " // normalise variables" << std::endl;
3212 fout << " std::vector<double> iV;" << std::endl;
3213 fout << " iV.reserve(inputValues.size());" << std::endl;
3214 fout << " int ivar = 0;" << std::endl;
3215 fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3216 fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3217 fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3218 fout << " }" << std::endl;
3219 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3220 GetMethodType() != Types::kHMatrix) {
3221 fout << " Transform( iV, -1 );" << std::endl;
3222 }
3223
3224 if(GetAnalysisType() == Types::kMulticlass) {
3225 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3226 } else {
3227 fout << " retval = GetMvaValue__( iV );" << std::endl;
3228 }
3229 } else {
3230 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3231 GetMethodType() != Types::kHMatrix) {
3232 fout << " std::vector<double> iV(inputValues);" << std::endl;
3233 fout << " Transform( iV, -1 );" << std::endl;
3234 if(GetAnalysisType() == Types::kMulticlass) {
3235 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3236 } else {
3237 fout << " retval = GetMvaValue__( iV );" << std::endl;
3238 }
3239 } else {
3240 if(GetAnalysisType() == Types::kMulticlass) {
3241 fout << " retval = GetMulticlassValues__( inputValues );" << std::endl;
3242 } else {
3243 fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3244 }
3245 }
3246 }
3247 fout << " }" << std::endl;
3248 fout << std::endl;
3249 fout << " return retval;" << std::endl;
3250 fout << "}" << std::endl;
3251
3252 // create output for transformation - if any
3253 if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3254 GetTransformationHandler().MakeFunction(fout, className,2);
3255
3256 // close the file
3257 fout.close();
3258}
3259
3260////////////////////////////////////////////////////////////////////////////////
3261/// prints out method-specific help method
3262
3264{
3265 // if options are written to reference file, also append help info
3266 std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3267 std::ofstream* o = 0;
3268 if (gConfig().WriteOptionsReference()) {
3269 Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3270 o = new std::ofstream( GetReferenceFile(), std::ios::app );
3271 if (!o->good()) { // file could not be opened --> Error
3272 Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3273 }
3274 std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3275 }
3276
3277 // "|--------------------------------------------------------------|"
3278 if (!o) {
3279 Log() << kINFO << Endl;
3280 Log() << gTools().Color("bold")
3281 << "================================================================"
3282 << gTools().Color( "reset" )
3283 << Endl;
3284 Log() << gTools().Color("bold")
3285 << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3286 << gTools().Color( "reset" )
3287 << Endl;
3288 }
3289 else {
3290 Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3291 }
3292
3293 // print method-specific help message
3294 GetHelpMessage();
3295
3296 if (!o) {
3297 Log() << Endl;
3298 Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3299 Log() << gTools().Color("bold")
3300 << "================================================================"
3301 << gTools().Color( "reset" )
3302 << Endl;
3303 Log() << Endl;
3304 }
3305 else {
3306 // indicate END
3307 Log() << "# End of Message___" << Endl;
3308 }
3309
3310 std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3311 if (o) o->close();
3312}
3313
3314// ----------------------- r o o t f i n d i n g ----------------------------
3315
3316////////////////////////////////////////////////////////////////////////////////
3317/// returns efficiency as function of cut
3318
3320{
3321 Double_t retval=0;
3322
3323 // retrieve the class object
3325 retval = fSplRefS->Eval( theCut );
3326 }
3327 else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3328
3329 // caution: here we take some "forbidden" action to hide a problem:
3330 // in some cases, in particular for likelihood, the binned efficiency distributions
3331 // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3332 // unbinned information available in the trees, but the unbinned minimization is
3333 // too slow, and we don't need to do a precision measurement here. Hence, we force
3334 // this property.
3335 Double_t eps = 1.0e-5;
3336 if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3337 else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3338
3339 return retval;
3340}
3341
3342////////////////////////////////////////////////////////////////////////////////
3343/// returns the event collection (i.e. the dataset) TRANSFORMED using the
3344/// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3345
3347{
3348 // if there's no variable transformation for this classifier, just hand back the
3349 // event collection of the data set
3350 if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3351 return (Data()->GetEventCollection(type));
3352 }
3353
3354 // otherwise, transform ALL the events and hand back the vector of the pointers to the
3355 // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3356 // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3357 Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3358 if (fEventCollections.at(idx) == 0) {
3359 fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3360 fEventCollections.at(idx) = GetTransformationHandler().CalcTransformations(*(fEventCollections.at(idx)),kTRUE);
3361 }
3362 return *(fEventCollections.at(idx));
3363}
3364
3365////////////////////////////////////////////////////////////////////////////////
3366/// calculates the TMVA version string from the training version code on the fly
3367
3369{
3370 UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3371 UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3372 UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3373
3374 return TString::Format("%i.%i.%i",a,b,c);
3375}
3376
3377////////////////////////////////////////////////////////////////////////////////
3378/// calculates the ROOT version string from the training version code on the fly
3379
3381{
3382 UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3383 UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3384 UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3385
3386 return TString::Format("%i.%02i/%02i",a,b,c);
3387}
3388
3389////////////////////////////////////////////////////////////////////////////////
3390
3393 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
3394
3395 if (mvaRes != NULL) {
3396 TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3397 TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3398 TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3399 TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3400
3401 if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3402
3403 if (SorB == 's' || SorB == 'S')
3404 return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3405 else
3406 return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3407 }
3408 return -1;
3409}
const Bool_t Use_Splines_for_Eff_
const Int_t NBIN_HIST_HIGH
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
#define s1(x)
Definition RSha256.hxx:91
#define ROOT_VERSION_CODE
Definition RVersion.hxx:24
bool Bool_t
Boolean (0=false, 1=true) (bool)
Definition RtypesCore.h:77
int Int_t
Signed integer 4 bytes (int)
Definition RtypesCore.h:59
char Char_t
Character 1 byte (char)
Definition RtypesCore.h:51
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char y2
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char y1
char name[80]
Definition TGX11.cxx:110
float xmin
float xmax
TMatrixT< Double_t > TMatrixD
Definition TMatrixDfwd.h:23
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2495
R__EXTERN TSystem * gSystem
Definition TSystem.h:572
#define TMVA_VERSION_CODE
Definition Version.h:47
const_iterator begin() const
const_iterator end() const
Class to manage histogram axis.
Definition TAxis.h:32
Double_t GetXmax() const
Definition TAxis.h:142
Double_t GetXmin() const
Definition TAxis.h:141
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
Definition TDatime.h:37
Describe directory structure in memory.
Definition TDirectory.h:45
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:131
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3764
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:927
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:879
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition TH1.cxx:7571
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition TH1.cxx:1263
virtual Int_t GetQuantiles(Int_t n, Double_t *xp, const Double_t *p=nullptr)
Compute Quantiles for this histogram.
Definition TH1.cxx:4603
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition TH1.cxx:741
2-D histogram with a float per channel (see TH1 documentation)
Definition TH2.h:307
Int_t Fill(Double_t) override
Invalid Fill method.
Definition TH2.cxx:356
A doubly linked list.
Definition TList.h:38
Class that contains all the information of a class.
Definition ClassInfo.h:49
TString fWeightFileExtension
Definition Config.h:125
VariablePlotting & GetVariablePlotting()
Definition Config.h:97
class TMVA::Config::VariablePlotting fVariablePlotting
IONames & GetIONames()
Definition Config.h:98
MsgLogger * fLogger
! message logger
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
Definition DataSet.h:58
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:399
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:408
Interface for all concrete MVA method implementations.
Definition IMethod.h:53
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
IPythonInteractive()
standard constructor
~IPythonInteractive()
standard destructor
void ClearGraphs()
This function sets the point number to 0 for all graphs.
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Virtual base Class for all MVA method.
Definition MethodBase.h:111
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
void PrintHelpMessage() const override
prints out method-specific help method
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
const char * GetName() const override
Definition MethodBase.h:334
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
void SetWeightFileDir(TString fileDir)
set directory of weight file
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables,...
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual Double_t GetSignificance() const
compute significance of mean difference
virtual Double_t GetProba(const Event *ev)
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual std::vector< Double_t > GetDataMvaValues(DataSet *data=nullptr, Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the given Data type
void SetupMethod()
setup of methods
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
virtual ~MethodBase()
destructor
void WriteMonitoringHistosToFile() const override
write special monitoring histograms to file dummy implementation here --------------—
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, , curve for given number of signal and background events; returns cut for maximum ...
virtual Double_t GetTrainingEfficiency(const TString &)
void SetWeightFileName(TString)
set the weight file name (depreciated)
TString GetWeightFileName() const
retrieve weight file name
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void InitBase()
default initialization called by all constructors
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
void MakeClass(const TString &classFileName=TString("")) const override
create reader class for method (classification only at present)
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
void ReadStateFromFile()
Function to write options and weights to file.
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream.
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
void SetTestvarName(const TString &v="")
Definition MethodBase.h:341
void ReadVariablesFromXML(void *varnode)
read variable info from XML
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables,...
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity:
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters according to the argument This is just a dummy .
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
void AddVarsXMLTo(void *parent) const
write variable info to XML
Double_t GetMvaValue(Double_t *errLower=nullptr, Double_t *errUpper=nullptr) override=0
void AddTargetsXMLTo(void *parent) const
write target info to XML
void ReadTargetsFromXML(void *tarnode)
read target info from XML
void ProcessBaseOptions()
the option string is decoded, for available options see "DeclareOptions"
void ReadStateFromXML(void *parent)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
void AddClassesXMLTo(void *parent) const
write class info to XML
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
void SetSource(const std::string &source)
Definition MsgLogger.h:68
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
@ kSpline3
Definition PDF.h:70
@ kSpline2
Definition PDF.h:70
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
Definition Results.h:57
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition RootFinder.h:48
Linear interpolation of TGraph.
Definition TSpline1.h:43
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition Tools.cxx:202
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition Tools.cxx:401
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition Tools.cxx:121
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition Tools.cxx:589
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:828
TXMLEngine & xmlengine()
Definition Tools.h:262
Bool_t CheckSplines(const TH1 *, const TSpline *)
check quality of splining by comparing splines and histograms in each bin
Definition Tools.cxx:479
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1150
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition Tools.cxx:383
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1124
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1162
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Definition Types.h:135
@ kBackground
Definition Types.h:136
@ kLikelihood
Definition Types.h:79
@ kHMatrix
Definition Types.h:81
@ kMulticlass
Definition Types.h:129
@ kNoAnalysisType
Definition Types.h:130
@ kClassification
Definition Types.h:127
@ kMaxAnalysisType
Definition Types.h:131
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
Linear interpolation class.
Gaussian Transformation of input variables.
Class for type info of MVA input variable.
Linear interpolation class.
Linear interpolation class.
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition TMultiGraph.h:34
Collectable string class.
Definition TObjString.h:28
Basic string class.
Definition TString.h:138
void ToLower()
Change string to lower-case.
Definition TString.cxx:1189
Int_t Atoi() const
Return integer value of string.
Definition TString.cxx:1994
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition TString.cxx:1170
const char * Data() const
Definition TString.h:384
@ kLeading
Definition TString.h:284
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:659
virtual const char * GetBuildNode() const
Return the build node name.
Definition TSystem.cxx:3931
virtual int mkdir(const char *name, Bool_t recursive=kFALSE)
Make a file system directory.
Definition TSystem.cxx:916
virtual const char * WorkingDirectory()
Return working directory.
Definition TSystem.cxx:881
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition TSystem.cxx:1612
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
TLine * line
Double_t x[n]
Definition legend1.C:17
TH1F * h1
Definition legend1.C:5
Config & gConfig()
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:251
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:673
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:199
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:124