Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodBase.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodBase *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21 * *
22 * Copyright (c) 2005-2011: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * U. of Bonn, Germany *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (see tmva/doc/LICENSE) *
31 * *
32 **********************************************************************************/
33
34/*! \class TMVA::MethodBase
35\ingroup TMVA
36
37 Virtual base Class for all MVA method
38
39 MethodBase hosts several specific evaluation methods.
40
41 The kind of MVA that provides optimal performance in an analysis strongly
42 depends on the particular application. The evaluation factory provides a
43 number of numerical benchmark results to directly assess the performance
44 of the MVA training on the independent test sample. These are:
45
46 - The _signal efficiency_ at three representative background efficiencies
47 (which is 1 &minus; rejection).
48 - The _significance_ of an MVA estimator, defined by the difference
49 between the MVA mean values for signal and background, divided by the
50 quadratic sum of their root mean squares.
51 - The _separation_ of an MVA _x_, defined by the integral
52 \f[
53 \frac{1}{2} \int \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx
54 \f]
55 where
56 \f$ S(x) \f$ and \f$ B(x) \f$ are the signal and background distributions,
57 respectively. The separation is zero for identical signal and background MVA
58 shapes, and it is one for disjunctive shapes.
59 - The average, \f$ \int x \mu (S(x)) dx \f$, of the signal \f$ \mu_{transform} \f$.
60 The \f$ \mu_{transform} \f$ of an MVA denotes the transformation that yields
61 a uniform background distribution. In this way, the signal distributions
62 \f$ S(x) \f$ can be directly compared among the various MVAs. The stronger
63 \f$ S(x) \f$ peaks towards one, the better is the discrimination of the MVA.
64 The \f$ \mu_{transform} \f$ is
65 [documented here](http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html).
66
67 The MVA standard output also prints the linear correlation coefficients between
68 signal and background, which can be useful to eliminate variables that exhibit too
69 strong correlations.
70*/
71
72#include "TMVA/MethodBase.h"
73
74#include "TMVA/Config.h"
75#include "TMVA/Configurable.h"
76#include "TMVA/DataSetInfo.h"
77#include "TMVA/DataSet.h"
78#include "TMVA/Factory.h"
79#include "TMVA/IMethod.h"
80#include "TMVA/MsgLogger.h"
81#include "TMVA/PDF.h"
82#include "TMVA/Ranking.h"
83#include "TMVA/DataLoader.h"
84#include "TMVA/Tools.h"
85#include "TMVA/Results.h"
89#include "TMVA/RootFinder.h"
90#include "TMVA/Timer.h"
91#include "TMVA/TSpline1.h"
92#include "TMVA/Types.h"
96#include "TMVA/VariableInfo.h"
100#include "TMVA/Version.h"
101
102#include "TROOT.h"
103#include "TSystem.h"
104#include "TObjString.h"
105#include "TQObject.h"
106#include "TSpline.h"
107#include "TMatrix.h"
108#include "TMath.h"
109#include "TH1F.h"
110#include "TH2F.h"
111#include "TFile.h"
112#include "TGraph.h"
113#include "TXMLEngine.h"
114
115#include <iomanip>
116#include <iostream>
117#include <fstream>
118#include <sstream>
119#include <cstdlib>
120#include <algorithm>
121#include <limits>
122
123
124
125using std::endl;
126using std::atof;
127
128//const Int_t MethodBase_MaxIterations_ = 200;
130
131//const Int_t NBIN_HIST_PLOT = 100;
132const Int_t NBIN_HIST_HIGH = 10000;
133
134#ifdef _WIN32
135/* Disable warning C4355: 'this' : used in base member initializer list */
136#pragma warning ( disable : 4355 )
137#endif
138
139
140#include "TMultiGraph.h"
141
142////////////////////////////////////////////////////////////////////////////////
143/// standard constructor
144
146{
147 fNumGraphs = 0;
148 fIndex = 0;
149}
150
151////////////////////////////////////////////////////////////////////////////////
152/// standard destructor
154{
155 if (fMultiGraph){
156 delete fMultiGraph;
157 fMultiGraph = nullptr;
158 }
159 return;
160}
161
162////////////////////////////////////////////////////////////////////////////////
163/// This function gets some title and it creates a TGraph for every title.
164/// It also sets up the style for every TGraph. All graphs are added to a single TMultiGraph.
165///
166/// \param[in] graphTitles vector of titles
167
169{
170 if (fNumGraphs!=0){
171 std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
172 return;
173 }
174 Int_t color = 2;
175 for(auto& title : graphTitles){
176 fGraphs.push_back( new TGraph() );
177 fGraphs.back()->SetTitle(title);
178 fGraphs.back()->SetName(title);
179 fGraphs.back()->SetFillColor(color);
180 fGraphs.back()->SetLineColor(color);
181 fGraphs.back()->SetMarkerColor(color);
182 fMultiGraph->Add(fGraphs.back());
183 color += 2;
184 fNumGraphs += 1;
185 }
186 return;
187}
188
189////////////////////////////////////////////////////////////////////////////////
190/// This function sets the point number to 0 for all graphs.
191
193{
194 for(Int_t i=0; i<fNumGraphs; i++){
195 fGraphs[i]->Set(0);
196 }
197}
198
199////////////////////////////////////////////////////////////////////////////////
200/// This function is used only in 2 TGraph case, and it will add new data points to graphs.
201///
202/// \param[in] x the x coordinate
203/// \param[in] y1 the y coordinate for the first TGraph
204/// \param[in] y2 the y coordinate for the second TGraph
205
207{
208 fGraphs[0]->Set(fIndex+1);
209 fGraphs[1]->Set(fIndex+1);
210 fGraphs[0]->SetPoint(fIndex, x, y1);
211 fGraphs[1]->SetPoint(fIndex, x, y2);
212 fIndex++;
213 return;
214}
215
216////////////////////////////////////////////////////////////////////////////////
217/// This function can add data points to as many TGraphs as we have.
218///
219/// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
220/// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
221
222void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
223{
224 for(Int_t i=0; i<fNumGraphs;i++){
225 fGraphs[i]->Set(fIndex+1);
226 fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
227 }
228 fIndex++;
229 return;
230}
231
232
233////////////////////////////////////////////////////////////////////////////////
234/// standard constructor
235
238 const TString& methodTitle,
240 const TString& theOption) :
241 IMethod(),
243 fTmpEvent ( 0 ),
244 fRanking ( 0 ),
245 fInputVars ( 0 ),
246 fAnalysisType ( Types::kNoAnalysisType ),
247 fRegressionReturnVal ( 0 ),
248 fMulticlassReturnVal ( 0 ),
249 fDataSetInfo ( dsi ),
250 fSignalReferenceCut ( 0.5 ),
251 fSignalReferenceCutOrientation( 1. ),
252 fVariableTransformType ( Types::kSignal ),
253 fJobName ( jobName ),
254 fMethodName ( methodTitle ),
255 fMethodType ( methodType ),
256 fTestvar ( "" ),
257 fTMVATrainingVersion ( TMVA_VERSION_CODE ),
258 fROOTTrainingVersion ( ROOT_VERSION_CODE ),
259 fConstructedFromWeightFile ( kFALSE ),
260 fBaseDir ( 0 ),
261 fMethodBaseDir ( 0 ),
262 fFile ( 0 ),
263 fSilentFile (kFALSE),
264 fModelPersistence (kTRUE),
265 fWeightFile ( "" ),
266 fEffS ( 0 ),
267 fDefaultPDF ( 0 ),
268 fMVAPdfS ( 0 ),
269 fMVAPdfB ( 0 ),
270 fSplS ( 0 ),
271 fSplB ( 0 ),
272 fSpleffBvsS ( 0 ),
273 fSplTrainS ( 0 ),
274 fSplTrainB ( 0 ),
275 fSplTrainEffBvsS ( 0 ),
276 fVarTransformString ( "None" ),
277 fTransformationPointer ( 0 ),
278 fTransformation ( dsi, methodTitle ),
279 fVerbose ( kFALSE ),
280 fVerbosityLevelString ( "Default" ),
281 fHelp ( kFALSE ),
282 fHasMVAPdfs ( kFALSE ),
283 fIgnoreNegWeightsInTraining( kFALSE ),
284 fSignalClass ( 0 ),
285 fBackgroundClass ( 0 ),
286 fSplRefS ( 0 ),
287 fSplRefB ( 0 ),
288 fSplTrainRefS ( 0 ),
289 fSplTrainRefB ( 0 ),
290 fSetupCompleted (kFALSE)
291{
294
295// // default extension for weight files
296}
297
298////////////////////////////////////////////////////////////////////////////////
299/// constructor used for Testing + Application of the MVA,
300/// only (no training), using given WeightFiles
301
304 const TString& weightFile ) :
305 IMethod(),
306 Configurable(""),
307 fTmpEvent ( 0 ),
308 fRanking ( 0 ),
309 fInputVars ( 0 ),
310 fAnalysisType ( Types::kNoAnalysisType ),
311 fRegressionReturnVal ( 0 ),
312 fMulticlassReturnVal ( 0 ),
313 fDataSetInfo ( dsi ),
314 fSignalReferenceCut ( 0.5 ),
315 fVariableTransformType ( Types::kSignal ),
316 fJobName ( "" ),
317 fMethodName ( "MethodBase" ),
318 fMethodType ( methodType ),
319 fTestvar ( "" ),
320 fTMVATrainingVersion ( 0 ),
321 fROOTTrainingVersion ( 0 ),
322 fConstructedFromWeightFile ( kTRUE ),
323 fBaseDir ( 0 ),
324 fMethodBaseDir ( 0 ),
325 fFile ( 0 ),
326 fSilentFile (kFALSE),
327 fModelPersistence (kTRUE),
328 fWeightFile ( weightFile ),
329 fEffS ( 0 ),
330 fDefaultPDF ( 0 ),
331 fMVAPdfS ( 0 ),
332 fMVAPdfB ( 0 ),
333 fSplS ( 0 ),
334 fSplB ( 0 ),
335 fSpleffBvsS ( 0 ),
336 fSplTrainS ( 0 ),
337 fSplTrainB ( 0 ),
338 fSplTrainEffBvsS ( 0 ),
339 fVarTransformString ( "None" ),
340 fTransformationPointer ( 0 ),
341 fTransformation ( dsi, "" ),
342 fVerbose ( kFALSE ),
343 fVerbosityLevelString ( "Default" ),
344 fHelp ( kFALSE ),
345 fHasMVAPdfs ( kFALSE ),
346 fIgnoreNegWeightsInTraining( kFALSE ),
347 fSignalClass ( 0 ),
348 fBackgroundClass ( 0 ),
349 fSplRefS ( 0 ),
350 fSplRefB ( 0 ),
351 fSplTrainRefS ( 0 ),
352 fSplTrainRefB ( 0 ),
353 fSetupCompleted (kFALSE)
354{
356// // constructor used for Testing + Application of the MVA,
357// // only (no training), using given WeightFiles
358}
359
360////////////////////////////////////////////////////////////////////////////////
361/// destructor
362
364{
365 // destructor
366 if (!fSetupCompleted) Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
367
368 // destructor
369 if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
370 if (fRanking != 0) delete fRanking;
371
372 // PDFs
373 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
374 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
375 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
376
377 // Splines
378 if (fSplS) { delete fSplS; fSplS = 0; }
379 if (fSplB) { delete fSplB; fSplB = 0; }
380 if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
381 if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
382 if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
383 if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
384 if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
385 if (fSplTrainEffBvsS) { delete fSplTrainEffBvsS; fSplTrainEffBvsS = 0; }
386
387 for (size_t i = 0; i < fEventCollections.size(); i++ ) {
388 if (fEventCollections.at(i)) {
389 for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
390 it != fEventCollections.at(i)->end(); ++it) {
391 delete (*it);
392 }
393 delete fEventCollections.at(i);
394 fEventCollections.at(i) = nullptr;
395 }
396 }
397
398 if (fRegressionReturnVal) delete fRegressionReturnVal;
399 if (fMulticlassReturnVal) delete fMulticlassReturnVal;
400}
401
402////////////////////////////////////////////////////////////////////////////////
403/// setup of methods
404
406{
407 // setup of methods
408
409 if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
410 InitBase();
411 DeclareBaseOptions();
412 Init();
413 DeclareOptions();
414 fSetupCompleted = kTRUE;
415}
416
417////////////////////////////////////////////////////////////////////////////////
418/// process all options
419/// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
420/// (sometimes, eg, fitters are used which can only be implemented during training phase)
421
423{
424 ProcessBaseOptions();
425 ProcessOptions();
426}
427
428////////////////////////////////////////////////////////////////////////////////
429/// check may be overridden by derived class
430/// (sometimes, eg, fitters are used which can only be implemented during training phase)
431
433{
434 CheckForUnusedOptions();
435}
436
437////////////////////////////////////////////////////////////////////////////////
438/// default initialization called by all constructors
439
441{
442 SetConfigDescription( "Configuration options for classifier architecture and tuning" );
443
444 fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
445 fNbinsMVAoutput = gConfig().fVariablePlotting.fNbinsMVAoutput;
446 fNbinsH = NBIN_HIST_HIGH;
447
448 fSplTrainS = 0;
449 fSplTrainB = 0;
450 fSplTrainEffBvsS = 0;
451 fMeanS = -1;
452 fMeanB = -1;
453 fRmsS = -1;
454 fRmsB = -1;
455 fXmin = DBL_MAX;
456 fXmax = -DBL_MAX;
457 fTxtWeightsOnly = kTRUE;
458 fSplRefS = 0;
459 fSplRefB = 0;
460
461 fTrainTime = -1.;
462 fTestTime = -1.;
463
464 fRanking = 0;
465
466 // temporary until the move to DataSet is complete
467 fInputVars = new std::vector<TString>;
468 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
469 fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
470 }
471 fRegressionReturnVal = 0;
472 fMulticlassReturnVal = 0;
473
474 fEventCollections.resize( 2 );
475 fEventCollections.at(0) = 0;
476 fEventCollections.at(1) = 0;
477
478 // retrieve signal and background class index
479 if (DataInfo().GetClassInfo("Signal") != 0) {
480 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
481 }
482 if (DataInfo().GetClassInfo("Background") != 0) {
483 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
484 }
485
486 SetConfigDescription( "Configuration options for MVA method" );
487 SetConfigName( TString("Method") + GetMethodTypeName() );
488}
489
490////////////////////////////////////////////////////////////////////////////////
491/// define the options (their key words) that can be set in the option string
492/// here the options valid for ALL MVA methods are declared.
493///
494/// know options:
495///
496/// - VariableTransform=None,Decorrelated,PCA to use transformed variables
497/// instead of the original ones
498/// - VariableTransformType=Signal,Background which decorrelation matrix to use
499/// in the method. Only the Likelihood
500/// Method can make proper use of independent
501/// transformations of signal and background
502/// - fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
503/// - fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
504/// - fHasMVAPdfs create PDFs for the MVA outputs
505/// - V for Verbose output (!V) for non verbos
506/// - H for Help message
507
509{
510 DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
511
512 DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
513 AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
514 AddPreDefVal( TString("Debug") );
515 AddPreDefVal( TString("Verbose") );
516 AddPreDefVal( TString("Info") );
517 AddPreDefVal( TString("Warning") );
518 AddPreDefVal( TString("Error") );
519 AddPreDefVal( TString("Fatal") );
520
521 // If True (default): write all training results (weights) as text files only;
522 // if False: write also in ROOT format (not available for all methods - will abort if not
523 fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
524 fNormalise = kFALSE; // OBSOLETE !!!
525
526 DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
527
528 DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
529
530 DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
531
532 DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
533 "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
534}
535
536////////////////////////////////////////////////////////////////////////////////
537/// the option string is decoded, for available options see "DeclareOptions"
538
540{
541 if (HasMVAPdfs()) {
542 // setting the default bin num... maybe should be static ? ==> Please no static (JS)
543 // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
544 // reading every PDF's definition and passing the option string to the next one to be read and marked
545 fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
546 fDefaultPDF->DeclareOptions();
547 fDefaultPDF->ParseOptions();
548 fDefaultPDF->ProcessOptions();
549 fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
550 fMVAPdfB->DeclareOptions();
551 fMVAPdfB->ParseOptions();
552 fMVAPdfB->ProcessOptions();
553 fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
554 fMVAPdfS->DeclareOptions();
555 fMVAPdfS->ParseOptions();
556 fMVAPdfS->ProcessOptions();
557
558 // the final marked option string is written back to the original methodbase
559 SetOptions( fMVAPdfS->GetOptions() );
560 }
561
562 TMVA::CreateVariableTransforms( fVarTransformString,
563 DataInfo(),
564 GetTransformationHandler(),
565 Log() );
566
567 if (!HasMVAPdfs()) {
568 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
569 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
570 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
571 }
572
573 if (fVerbose) { // overwrites other settings
574 fVerbosityLevelString = TString("Verbose");
575 Log().SetMinType( kVERBOSE );
576 }
577 else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
578 else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
579 else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
580 else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
581 else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
582 else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
583 else if (fVerbosityLevelString != "Default" ) {
584 Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
585 << fVerbosityLevelString << "' unknown." << Endl;
586 }
587 Event::SetIgnoreNegWeightsInTraining(fIgnoreNegWeightsInTraining);
588}
589
590////////////////////////////////////////////////////////////////////////////////
591/// options that are used ONLY for the READER to ensure backward compatibility
592/// they are hence without any effect (the reader is only reading the training
593/// options that HAD been used at the training of the .xml weight file at hand
594
596{
597 DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
598 DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
599 DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
600 "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
601 AddPreDefVal( TString("Signal") );
602 AddPreDefVal( TString("Background") );
603 DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
604 // Why on earth ?? was this here? Was the verbosity level option meant to 'disappear? Not a good idea i think..
605 // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
606 // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
607 // AddPreDefVal( TString("Debug") );
608 // AddPreDefVal( TString("Verbose") );
609 // AddPreDefVal( TString("Info") );
610 // AddPreDefVal( TString("Warning") );
611 // AddPreDefVal( TString("Error") );
612 // AddPreDefVal( TString("Fatal") );
613 DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
614 DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
615}
616
617
618////////////////////////////////////////////////////////////////////////////////
619/// call the Optimizer with the set of parameters and ranges that
620/// are meant to be tuned.
621
622std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
623{
624 // this is just a dummy... needs to be implemented for each method
625 // individually (as long as we don't have it automatized via the
626 // configuration string
627
628 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
629 << GetName() << Endl;
630 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
631
632 return std::map<TString,Double_t>();
633}
634
635////////////////////////////////////////////////////////////////////////////////
636/// set the tuning parameters according to the argument
637/// This is just a dummy .. have a look at the MethodBDT how you could
638/// perhaps implement the same thing for the other Classifiers..
639
640void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
641{
642}
643
644////////////////////////////////////////////////////////////////////////////////
645
647{
648 Data()->SetCurrentType(Types::kTraining);
649 Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
650
651 // train the MVA method
652 if (Help()) PrintHelpMessage();
653
654 // all histograms should be created in the method's subdirectory
655 if(!IsSilentFile()) BaseDir()->cd();
656
657 // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
658 // needed for this classifier
659 GetTransformationHandler().CalcTransformations(Data()->GetEventCollection());
660
661 // call training of derived MVA
662 Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
663 << "Begin training" << Endl;
664 Long64_t nEvents = Data()->GetNEvents();
665 Timer traintimer( nEvents, GetName(), kTRUE );
666 Train();
667 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
668 << "\tEnd of training " << Endl;
669 SetTrainTime(traintimer.ElapsedSeconds());
670 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
671 << "Elapsed time for training with " << nEvents << " events: "
672 << traintimer.GetElapsedTime() << " " << Endl;
673
674 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
675 << "\tCreate MVA output for ";
676
677 // create PDFs for the signal and background MVA distributions (if required)
678 if (DoMulticlass()) {
679 Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
680 AddMulticlassOutput(Types::kTraining);
681 }
682 else if (!DoRegression()) {
683
684 Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
685 AddClassifierOutput(Types::kTraining);
686 if (HasMVAPdfs()) {
687 CreateMVAPdfs();
688 AddClassifierOutputProb(Types::kTraining);
689 }
690
691 } else {
692
693 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
694 AddRegressionOutput( Types::kTraining );
695
696 if (HasMVAPdfs() ) {
697 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
698 CreateMVAPdfs();
699 }
700 }
701
702 // write the current MVA state into stream
703 // produced are one text file and one ROOT file
704 if (fModelPersistence ) WriteStateToFile();
705
706 // produce standalone make class (presently only supported for classification)
707 if ((!DoRegression()) && (fModelPersistence)) MakeClass();
708
709 // write additional monitoring histograms to main target file (not the weight file)
710 // again, make sure the histograms go into the method's subdirectory
711 if(!IsSilentFile())
712 {
713 BaseDir()->cd();
714 WriteMonitoringHistosToFile();
715 }
716}
717
718////////////////////////////////////////////////////////////////////////////////
719
721{
722 if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
723 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
724 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), Types::kTesting, Types::kRegression);
725 bool truncate = false;
726 TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
727 stddev = sqrt(h1->GetMean());
728 truncate = true;
729 Double_t yq[1], xq[]={0.9};
730 h1->GetQuantiles(1,yq,xq);
731 TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
732 stddev90Percent = sqrt(h2->GetMean());
733 delete h1;
734 delete h2;
735}
736
737////////////////////////////////////////////////////////////////////////////////
738/// Get al regression values in one call
740{
741 Long64_t nEvents = Data()->GetNEvents();
742 // use timer
743 Timer timer( nEvents, GetName(), kTRUE );
744
745 // Drawing the progress bar every event was causing a huge slowdown in the evaluation time
746 // So we set some parameters to draw the progress bar a total of totalProgressDraws, i.e. only draw every 1 in 100
747
748 Int_t totalProgressDraws = 100; // total number of times to update the progress bar
749 Int_t drawProgressEvery = 1; // draw every nth event such that we have a total of totalProgressDraws
751
752 size_t ntargets = Data()->GetEvent(0)->GetNTargets();
753 std::vector<float> output(nEvents*ntargets);
754 auto itr = output.begin();
755 for (Int_t ievt=0; ievt<nEvents; ievt++) {
756
757 Data()->SetCurrentEvent(ievt);
758 std::vector< Float_t > vals = GetRegressionValues();
759 if (vals.size() != ntargets)
760 Log() << kFATAL << "Output regression vector with size " << vals.size() << " is not consistent with target size of "
761 << ntargets << std::endl;
762
763 std::copy(vals.begin(), vals.end(), itr);
764 itr += vals.size();
765
766 // Only draw the progress bar once in a while, doing this every event causes the evaluation to be ridiculously slow
767 if(ievt % drawProgressEvery == 0 || ievt==nEvents-1) timer.DrawProgressBar( ievt );
768 }
769
770 return output;
771}
772
773////////////////////////////////////////////////////////////////////////////////
774/// prepare tree branch with the method's discriminating variable
775
777{
778 Data()->SetCurrentType(type);
779
780 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
781
782 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), type, Types::kRegression);
783
784 Long64_t nEvents = Data()->GetNEvents();
785
786 // use timer
787 Timer timer( nEvents, GetName(), kTRUE );
788
789 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
790 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
791
792 regRes->Resize( nEvents );
793
794 std::vector<float> output = GetAllRegressionValues();
795 // assume we have all number of targets for all events
796 Data()->SetCurrentEvent(0);
797 size_t nTargets = GetEvent()->GetNTargets();
798 auto regValuesBegin = output.begin();
800
801 if (output.size() != nTargets * size_t(nEvents))
802 Log() << kFATAL << "Output regression vector with size " << output.size() << " is not consistent with target size of "
803 << nTargets << " and number of events " << nEvents << std::endl;
804
805
806 for (Int_t ievt=0; ievt<nEvents; ievt++) {
807
808 std::vector< Float_t > vals(regValuesBegin, regValuesEnd);
809 regRes->SetValue( vals, ievt );
810
813 }
814
815 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
816 << "Elapsed time for evaluation of " << nEvents << " events: "
817 << timer.GetElapsedTime() << " " << Endl;
818
819 // store time used for testing
821 SetTestTime(timer.ElapsedSeconds());
822
823 TString histNamePrefix(GetTestvarName());
824 histNamePrefix += (type==Types::kTraining?"train":"test");
825 regRes->CreateDeviationHistograms( histNamePrefix );
826}
827////////////////////////////////////////////////////////////////////////////////
828/// Get all multi-class values
830{
831 // use timer for progress bar
832
833 Long64_t nEvents = Data()->GetNEvents();
834 Timer timer( nEvents, GetName(), kTRUE );
835
836 Int_t modulo = Int_t(nEvents/100) + 1;
837 // call first time to get number of classes
838 Data()->SetCurrentEvent(0);
839 std::vector< Float_t > vals = GetMulticlassValues();
840 std::vector<float> output(nEvents * vals.size());
841 auto itr = output.begin();
842 std::copy(vals.begin(), vals.end(), itr);
843 for (Int_t ievt=1; ievt<nEvents; ievt++) {
844 itr += vals.size();
845 Data()->SetCurrentEvent(ievt);
846 vals = GetMulticlassValues();
847
848 std::copy(vals.begin(), vals.end(), itr);
849
850 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
851 }
852 return output;
853}
854////////////////////////////////////////////////////////////////////////////////
855/// prepare tree branch with the method's discriminating variable
856
858{
859 Data()->SetCurrentType(type);
860
861 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
862
863 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
864 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
865
866 Long64_t nEvents = Data()->GetNEvents();
867
868 // use timer
869 Timer timer( nEvents, GetName(), kTRUE );
870
871 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
872 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
873
874 resMulticlass->Resize( nEvents );
875 std::vector<Float_t> output = GetAllMulticlassValues();
876 size_t nClasses = output.size()/nEvents;
877 for (Int_t ievt=0; ievt<nEvents; ievt++) {
878 std::vector< Float_t > vals(output.begin()+ievt*nClasses, output.begin()+(ievt+1)*nClasses);
879 resMulticlass->SetValue( vals, ievt );
880 }
881
882
883 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
884 << "Elapsed time for evaluation of " << nEvents << " events: "
885 << timer.GetElapsedTime() << " " << Endl;
886
887 // store time used for testing
889 SetTestTime(timer.ElapsedSeconds());
890
891 TString histNamePrefix(GetTestvarName());
892 histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
893
894 resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
895 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
896}
897
898////////////////////////////////////////////////////////////////////////////////
899
901 if (err) *err=-1;
902 if (errUpper) *errUpper=-1;
903}
904
905////////////////////////////////////////////////////////////////////////////////
906
908 fTmpEvent = ev;
909 Double_t val = GetMvaValue(err, errUpper);
910 fTmpEvent = 0;
911 return val;
912}
913
914////////////////////////////////////////////////////////////////////////////////
915/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
916/// for a quick determination if an event would be selected as signal or background
917
919 return GetMvaValue()*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
920}
921////////////////////////////////////////////////////////////////////////////////
922/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
923/// for a quick determination if an event with this mva output value would be selected as signal or background
924
926 return mvaVal*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
927}
928
929////////////////////////////////////////////////////////////////////////////////
930/// prepare tree branch with the method's discriminating variable
931
933{
934 Data()->SetCurrentType(type);
935
937 (ResultsClassification*)Data()->GetResults(GetMethodName(), type, Types::kClassification );
938
939 Long64_t nEvents = Data()->GetNEvents();
940 clRes->Resize( nEvents );
941
942 // use timer
943 Timer timer( nEvents, GetName(), kTRUE );
944
945 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
946 << "Evaluation of " << GetMethodName() << " on "
947 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
948 << " sample (" << nEvents << " events)" << Endl;
949
950 std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
951
952 Log() << kINFO
953 << "Elapsed time for evaluation of " << nEvents << " events: "
954 << timer.GetElapsedTime() << " " << Endl;
955
956 // store time used for testing
958 SetTestTime(timer.ElapsedSeconds());
959
960 // load mva values and type to results object
961 for (Int_t ievt = 0; ievt < nEvents; ievt++) {
962 // note we do not need the trasformed event to get the signal/background information
963 // by calling Data()->GetEvent instead of this->GetEvent we access the untransformed one
964 auto ev = Data()->GetEvent(ievt);
965 clRes->SetValue(mvaValues[ievt], ievt, DataInfo().IsSignal(ev));
966 }
967}
968
969////////////////////////////////////////////////////////////////////////////////
970/// get all the MVA values for the events of the current Data type
972{
973
974 Long64_t nEvents = Data()->GetNEvents();
975 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
976 if (firstEvt < 0) firstEvt = 0;
977 std::vector<Double_t> values(lastEvt-firstEvt);
978 // log in case of looping on all the events
979 nEvents = values.size();
980
981 // use timer
982 Timer timer( nEvents, GetName(), kTRUE );
983
984 if (logProgress)
985 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
986 << "Evaluation of " << GetMethodName() << " on "
987 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
988 << " sample (" << nEvents << " events)" << Endl;
989
990 for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
991 Data()->SetCurrentEvent(ievt);
992 values[ievt] = GetMvaValue();
993
994 // print progress
995 if (logProgress) {
996 Int_t modulo = Int_t(nEvents/100);
997 if (modulo <= 0 ) modulo = 1;
998 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
999 }
1000 }
1001 if (logProgress) {
1002 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1003 << "Elapsed time for evaluation of " << nEvents << " events: "
1004 << timer.GetElapsedTime() << " " << Endl;
1005 }
1006
1007 return values;
1008}
1009
1010////////////////////////////////////////////////////////////////////////////////
1011/// get all the MVA values for the events of the given Data type
1012// (this is used by Method Category and it does not need to be re-implemented by derived classes )
1014{
1015 fTmpData = data;
1016 auto result = GetMvaValues(firstEvt, lastEvt, logProgress);
1017 fTmpData = nullptr;
1018 return result;
1019}
1020
1021////////////////////////////////////////////////////////////////////////////////
1022/// prepare tree branch with the method's discriminating variable
1023
1025{
1026 Data()->SetCurrentType(type);
1027
1029 (ResultsClassification*)Data()->GetResults(TString("prob_")+GetMethodName(), type, Types::kClassification );
1030
1031 Long64_t nEvents = Data()->GetNEvents();
1032
1033 // use timer
1034 Timer timer( nEvents, GetName(), kTRUE );
1035
1036 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
1037 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
1038
1039 mvaProb->Resize( nEvents );
1040 Int_t modulo = Int_t(nEvents/100);
1041 if (modulo <= 0 ) modulo = 1;
1042 for (Int_t ievt=0; ievt<nEvents; ievt++) {
1043
1044 Data()->SetCurrentEvent(ievt);
1045 Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
1046 if (proba < 0) break;
1047 mvaProb->SetValue( proba, ievt, DataInfo().IsSignal( Data()->GetEvent()) );
1048
1049 // print progress
1050 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
1051 }
1052
1053 Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
1054 << "Elapsed time for evaluation of " << nEvents << " events: "
1055 << timer.GetElapsedTime() << " " << Endl;
1056}
1057
1058////////////////////////////////////////////////////////////////////////////////
1059/// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
1060///
1061/// - bias = average deviation
1062/// - dev = average absolute deviation
1063/// - rms = rms of deviation
1064
1069 Double_t& corr,
1071{
1072 Types::ETreeType savedType = Data()->GetCurrentType();
1073 Data()->SetCurrentType(type);
1074
1075 bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
1076 Double_t sumw = 0;
1077 Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
1078 const Int_t nevt = GetNEvents();
1079 Float_t* rV = new Float_t[nevt];
1080 Float_t* tV = new Float_t[nevt];
1081 Float_t* wV = new Float_t[nevt];
1082 Float_t xmin = 1e30, xmax = -1e30;
1083 Log() << kINFO << "Calculate regression for all events" << Endl;
1084 Timer timer( nevt, GetName(), kTRUE );
1085 Long64_t modulo = Long64_t(nevt / 100) + 1;
1086 auto output = GetAllRegressionValues();
1087 int ntargets = Data()->GetEvent(0)->GetNTargets();
1088 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1089 const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
1090 Float_t t = ev->GetTarget(0);
1091 Float_t w = ev->GetWeight();
1093 Float_t d = (r-t);
1094
1095 // find min/max
1098
1099 // store for truncated RMS computation
1100 rV[ievt] = r;
1101 tV[ievt] = t;
1102 wV[ievt] = w;
1103
1104 // compute deviation-squared
1105 sumw += w;
1106 bias += w * d;
1107 dev += w * TMath::Abs(d);
1108 rms += w * d * d;
1109
1110 // compute correlation between target and regression estimate
1111 m1 += t*w; s1 += t*t*w;
1112 m2 += r*w; s2 += r*r*w;
1113 s12 += t*r;
1114 // print progress
1115 if (ievt % modulo == 0)
1116 timer.DrawProgressBar(ievt);
1117 }
1118 timer.DrawProgressBar(nevt - 1);
1119 Log() << kINFO << "Elapsed time for evaluation of " << nevt << " events: "
1120 << timer.GetElapsedTime() << " " << Endl;
1121
1122 // standard quantities
1123 bias /= sumw;
1124 dev /= sumw;
1125 rms /= sumw;
1127
1128 // correlation
1129 m1 /= sumw;
1130 m2 /= sumw;
1131 corr = s12/sumw - m1*m2;
1132 corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1133
1134 // create histogram required for computation of mutual information
1135 TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1136 TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1137
1138 // compute truncated RMS and fill histogram
1139 Double_t devMax = bias + 2*rms;
1140 Double_t devMin = bias - 2*rms;
1141 sumw = 0;
1142 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1143 Float_t d = (rV[ievt] - tV[ievt]);
1144 hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1145 if (d >= devMin && d <= devMax) {
1146 sumw += wV[ievt];
1147 biasT += wV[ievt] * d;
1148 devT += wV[ievt] * TMath::Abs(d);
1149 rmsT += wV[ievt] * d * d;
1150 histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1151 }
1152 }
1153 biasT /= sumw;
1154 devT /= sumw;
1155 rmsT /= sumw;
1157 mInf = gTools().GetMutualInformation( *hist );
1159
1160 delete hist;
1161 delete histT;
1162
1163 delete [] rV;
1164 delete [] tV;
1165 delete [] wV;
1166
1167 Data()->SetCurrentType(savedType);
1168}
1169
1170
1171////////////////////////////////////////////////////////////////////////////////
1172/// test multiclass classification
1173
1175{
1176 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
1177 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1178
1179 // GA evaluation of best cut for sig eff * sig pur. Slow, disabled for now.
1180 // Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test
1181 // data..." << Endl; for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1182 // resMulticlass->GetBestMultiClassCuts(icls);
1183 // }
1184
1185 // Create histograms for use in TMVA GUI
1186 TString histNamePrefix(GetTestvarName());
1189
1190 resMulticlass->CreateMulticlassHistos(histNamePrefixTest, fNbinsMVAoutput, fNbinsH);
1191 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTest);
1192
1193 resMulticlass->CreateMulticlassHistos(histNamePrefixTrain, fNbinsMVAoutput, fNbinsH);
1194 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTrain);
1195}
1196
1197
1198////////////////////////////////////////////////////////////////////////////////
1199/// initialization
1200
1202{
1203 Data()->SetCurrentType(Types::kTesting);
1204
1206 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
1207
1208 // sanity checks: tree must exist, and theVar must be in tree
1209 if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1210 Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1211 << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1212 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1213 << " not found in tree" << Endl;
1214 }
1215
1216 // basic statistics operations are made in base class
1217 gTools().ComputeStat( GetEventCollection(Types::kTesting), mvaRes->GetValueVector(),
1218 fMeanS, fMeanB, fRmsS, fRmsB, fXmin, fXmax, fSignalClass );
1219
1220 // choose reasonable histogram ranges, by removing outliers
1221 Double_t nrms = 10;
1222 fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1223 fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1224
1225 // determine cut orientation
1226 fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
1227
1228 // fill 2 types of histograms for the various analyses
1229 // this one is for actual plotting
1230
1231 Double_t sxmax = fXmax+0.00001;
1232
1233 // classifier response distributions for training sample
1234 // MVA plots used for graphics representation (signal)
1236 if(IsSilentFile()) {
1237 TestvarName = TString::Format("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1238 } else {
1239 TestvarName=GetTestvarName();
1240 }
1241 TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1242 TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1243 mvaRes->Store(mva_s, "MVA_S");
1244 mvaRes->Store(mva_b, "MVA_B");
1245 mva_s->Sumw2();
1246 mva_b->Sumw2();
1247
1248 TH1* proba_s = 0;
1249 TH1* proba_b = 0;
1250 TH1* rarity_s = 0;
1251 TH1* rarity_b = 0;
1252 if (HasMVAPdfs()) {
1253 // P(MVA) plots used for graphics representation
1254 proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1255 proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1256 mvaRes->Store(proba_s, "Prob_S");
1257 mvaRes->Store(proba_b, "Prob_B");
1258 proba_s->Sumw2();
1259 proba_b->Sumw2();
1260
1261 // R(MVA) plots used for graphics representation
1262 rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1263 rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1264 mvaRes->Store(rarity_s, "Rar_S");
1265 mvaRes->Store(rarity_b, "Rar_B");
1266 rarity_s->Sumw2();
1267 rarity_b->Sumw2();
1268 }
1269
1270 // MVA plots used for efficiency calculations (large number of bins)
1271 TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1272 TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1273 mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1274 mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1275 mva_eff_s->Sumw2();
1276 mva_eff_b->Sumw2();
1277
1278 // fill the histograms
1279
1281 (Data()->GetResults( TString("prob_")+GetMethodName(), Types::kTesting, Types::kMaxAnalysisType ) );
1282
1283 Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1284 if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1285 //std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1286
1287 //LM: this is needed to avoid crashes in ROOCCURVE
1288 if ( mvaRes->GetSize() != GetNEvents() ) {
1289 Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1290 assert(mvaRes->GetSize() == GetNEvents());
1291 }
1292
1293 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1294
1295 const Event* ev = GetEvent(ievt);
1296 Float_t v = (*mvaRes)[ievt][0];
1297 Float_t w = ev->GetWeight();
1298
1299 if (DataInfo().IsSignal(ev)) {
1300 //mvaResTypes->push_back(kTRUE);
1301 mva_s ->Fill( v, w );
1302 if (mvaProb) {
1303 proba_s->Fill( (*mvaProb)[ievt][0], w );
1304 rarity_s->Fill( GetRarity( v ), w );
1305 }
1306
1307 mva_eff_s ->Fill( v, w );
1308 }
1309 else {
1310 //mvaResTypes->push_back(kFALSE);
1311 mva_b ->Fill( v, w );
1312 if (mvaProb) {
1313 proba_b->Fill( (*mvaProb)[ievt][0], w );
1314 rarity_b->Fill( GetRarity( v ), w );
1315 }
1316 mva_eff_b ->Fill( v, w );
1317 }
1318 }
1319
1320 // uncomment those (and several others if you want unnormalized output
1321 gTools().NormHist( mva_s );
1322 gTools().NormHist( mva_b );
1323 gTools().NormHist( proba_s );
1324 gTools().NormHist( proba_b );
1329
1330 // create PDFs from histograms, using default splines, and no additional smoothing
1331 if (fSplS) { delete fSplS; fSplS = 0; }
1332 if (fSplB) { delete fSplB; fSplB = 0; }
1333 fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1334 fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1335}
1336
1337////////////////////////////////////////////////////////////////////////////////
1338/// general method used in writing the header of the weight files where
1339/// the used variables, variable transformation type etc. is specified
1340
1341void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1342{
1343 TString prefix = "";
1345
1346 tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1347 tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1348 tf.setf(std::ios::left);
1349 tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1350 << GetTrainingTMVAVersionCode() << "]" << std::endl;
1351 tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1352 << GetTrainingROOTVersionCode() << "]" << std::endl;
1353 tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1354 tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1355 tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1356 tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1357 tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1358
1359 TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1360
1361 tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1362 tf << prefix << std::endl;
1363
1364 delete userInfo;
1365
1366 // First write all options
1367 tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1368 WriteOptionsToStream( tf, prefix );
1369 tf << prefix << std::endl;
1370
1371 // Second write variable info
1372 tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1373 WriteVarsToStream( tf, prefix );
1374 tf << prefix << std::endl;
1375}
1376
1377////////////////////////////////////////////////////////////////////////////////
1378/// xml writing
1379
1380void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1381{
1382 void* it = gTools().AddChild(gi,"Info");
1383 gTools().AddAttr(it,"name", name);
1384 gTools().AddAttr(it,"value", value);
1385}
1386
1387////////////////////////////////////////////////////////////////////////////////
1388
1390 if (analysisType == Types::kRegression) {
1391 AddRegressionOutput( type );
1392 } else if (analysisType == Types::kMulticlass) {
1393 AddMulticlassOutput( type );
1394 } else {
1395 AddClassifierOutput( type );
1396 if (HasMVAPdfs())
1397 AddClassifierOutputProb( type );
1398 }
1399}
1400
1401////////////////////////////////////////////////////////////////////////////////
1402/// general method used in writing the header of the weight files where
1403/// the used variables, variable transformation type etc. is specified
1404
1405void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1406{
1407 if (!parent) return;
1408
1410
1411 void* gi = gTools().AddChild(parent, "GeneralInfo");
1412 AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1413 AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1414 AddInfoItem( gi, "Creator", userInfo->fUser);
1415 TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1416 AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1417 AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1418 AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1419 AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1420
1421 Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1422 TString analysisType((aType==Types::kRegression) ? "Regression" :
1423 (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1424 AddInfoItem( gi, "AnalysisType", analysisType );
1425 delete userInfo;
1426
1427 // write options
1428 AddOptionsXMLTo( parent );
1429
1430 // write variable info
1431 AddVarsXMLTo( parent );
1432
1433 // write spectator info
1434 if (fModelPersistence)
1435 AddSpectatorsXMLTo( parent );
1436
1437 // write class info if in multiclass mode
1438 AddClassesXMLTo(parent);
1439
1440 // write target info if in regression mode
1441 if (DoRegression()) AddTargetsXMLTo(parent);
1442
1443 // write transformations
1444 GetTransformationHandler(false).AddXMLTo( parent );
1445
1446 // write MVA variable distributions
1447 void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1448 if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1449 if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1450
1451 // write weights
1452 AddWeightsXMLTo( parent );
1453}
1454
1455////////////////////////////////////////////////////////////////////////////////
1456/// write reference MVA distributions (and other information)
1457/// to a ROOT type weight file
1458
1460{
1461 TDirectory::TContext dirCtx{nullptr}; // Don't register histograms to current directory
1462 fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1463 fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1464
1465 ReadWeightsFromStream( rf );
1466
1467 SetTestvarName();
1468}
1469
1470////////////////////////////////////////////////////////////////////////////////
1471/// write options and weights to file
1472/// note that each one text file for the main configuration information
1473/// and one ROOT file for ROOT objects are created
1474
1476{
1477 // ---- create the text file
1478 TString tfname( GetWeightFileName() );
1479
1480 // writing xml file
1481 TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1482 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1483 << "Creating xml weight file: "
1484 << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1485 void* doc = gTools().xmlengine().NewDoc();
1486 void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1487 gTools().xmlengine().DocSetRootElement(doc,rootnode);
1488 gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1489 WriteStateToXML(rootnode);
1492}
1493
1494////////////////////////////////////////////////////////////////////////////////
1495/// Function to write options and weights to file
1496
1498{
1499 // get the filename
1500
1501 TString tfname(GetWeightFileName());
1502
1503 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1504 << "Reading weight file: "
1505 << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1506
1507 if (tfname.EndsWith(".xml") ) {
1508 void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1509 if (!doc) {
1510 Log() << kFATAL << "Error parsing XML file " << tfname << Endl;
1511 }
1512 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1513 ReadStateFromXML(rootnode);
1515 }
1516 else {
1517 std::filebuf fb;
1518 fb.open(tfname.Data(),std::ios::in);
1519 if (!fb.is_open()) { // file not found --> Error
1520 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1521 << "Unable to open input weight file: " << tfname << Endl;
1522 }
1523 std::istream fin(&fb);
1524 ReadStateFromStream(fin);
1525 fb.close();
1526 }
1527 if (!fTxtWeightsOnly) {
1528 // ---- read the ROOT file
1529 TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1530 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1531 << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1532 TFile* rfile = TFile::Open( rfname, "READ" );
1533 ReadStateFromStream( *rfile );
1534 rfile->Close();
1535 }
1536}
1537////////////////////////////////////////////////////////////////////////////////
1538/// for reading from memory
1539
1541 void* doc = gTools().xmlengine().ParseString(xmlstr);
1542 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1543 ReadStateFromXML(rootnode);
1545
1546 return;
1547}
1548
1549////////////////////////////////////////////////////////////////////////////////
1550
1552{
1553
1555 gTools().ReadAttr( methodNode, "Method", fullMethodName );
1556
1557 fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1558
1559 // update logger
1560 Log().SetSource( GetName() );
1561 Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1562 << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1563
1564 // after the method name is read, the testvar can be set
1565 SetTestvarName();
1566
1567 TString nodeName("");
1568 void* ch = gTools().GetChild(methodNode);
1569 while (ch!=0) {
1570 nodeName = TString( gTools().GetName(ch) );
1571
1572 if (nodeName=="GeneralInfo") {
1573 // read analysis type
1574
1575 TString name(""),val("");
1576 void* antypeNode = gTools().GetChild(ch);
1577 while (antypeNode) {
1578 gTools().ReadAttr( antypeNode, "name", name );
1579
1580 if (name == "TrainingTime")
1581 gTools().ReadAttr( antypeNode, "value", fTrainTime );
1582
1583 if (name == "AnalysisType") {
1584 gTools().ReadAttr( antypeNode, "value", val );
1585 val.ToLower();
1586 if (val == "regression" ) SetAnalysisType( Types::kRegression );
1587 else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1588 else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1589 else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1590 }
1591
1592 if (name == "TMVA Release" || name == "TMVA") {
1593 TString s;
1594 gTools().ReadAttr( antypeNode, "value", s);
1595 fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1596 Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1597 }
1598
1599 if (name == "ROOT Release" || name == "ROOT") {
1600 TString s;
1601 gTools().ReadAttr( antypeNode, "value", s);
1602 fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1603 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1604 << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1605 }
1607 }
1608 }
1609 else if (nodeName=="Options") {
1610 ReadOptionsFromXML(ch);
1611 ParseOptions();
1612
1613 }
1614 else if (nodeName=="Variables") {
1615 ReadVariablesFromXML(ch);
1616 }
1617 else if (nodeName=="Spectators") {
1618 ReadSpectatorsFromXML(ch);
1619 }
1620 else if (nodeName=="Classes") {
1621 if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1622 }
1623 else if (nodeName=="Targets") {
1624 if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1625 }
1626 else if (nodeName=="Transformations") {
1627 GetTransformationHandler().ReadFromXML(ch);
1628 }
1629 else if (nodeName=="MVAPdfs") {
1631 if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1632 if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1633 void* pdfnode = gTools().GetChild(ch);
1634 if (pdfnode) {
1635 gTools().ReadAttr(pdfnode, "Name", pdfname);
1636 fMVAPdfS = new PDF(pdfname);
1637 fMVAPdfS->ReadXML(pdfnode);
1639 gTools().ReadAttr(pdfnode, "Name", pdfname);
1640 fMVAPdfB = new PDF(pdfname);
1641 fMVAPdfB->ReadXML(pdfnode);
1642 }
1643 }
1644 else if (nodeName=="Weights") {
1645 ReadWeightsFromXML(ch);
1646 }
1647 else {
1648 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1649 }
1650 ch = gTools().GetNextChild(ch);
1651
1652 }
1653
1654 // update transformation handler
1655 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1656}
1657
1658////////////////////////////////////////////////////////////////////////////////
1659/// read the header from the weight files of the different MVA methods
1660
1662{
1663 char buf[512];
1664
1665 // when reading from stream, we assume the files are produced with TMVA<=397
1666 SetAnalysisType(Types::kClassification);
1667
1668
1669 // first read the method name
1670 GetLine(fin,buf);
1671 while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1672 TString namestr(buf);
1673
1674 TString methodType = namestr(0,namestr.Index("::"));
1675 methodType = methodType(methodType.Last(' '),methodType.Length());
1677
1678 TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1679 methodName = methodName.Strip(TString::kLeading);
1680 if (methodName == "") methodName = methodType;
1681 fMethodName = methodName;
1682
1683 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1684
1685 // update logger
1686 Log().SetSource( GetName() );
1687
1688 // now the question is whether to read the variables first or the options (well, of course the order
1689 // of writing them needs to agree)
1690 //
1691 // the option "Decorrelation" is needed to decide if the variables we
1692 // read are decorrelated or not
1693 //
1694 // the variables are needed by some methods (TMLP) to build the NN
1695 // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1696 // we read the variables, and then we process the options
1697
1698 // now read all options
1699 GetLine(fin,buf);
1700 while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1701 ReadOptionsFromStream(fin);
1702 ParseOptions();
1703
1704 // Now read variable info
1705 fin.getline(buf,512);
1706 while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1707 ReadVarsFromStream(fin);
1708
1709 // now we process the options (of the derived class)
1710 ProcessOptions();
1711
1712 if (IsNormalised()) {
1714 GetTransformationHandler().AddTransformation( new VariableNormalizeTransform(DataInfo()), -1 );
1715 norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1716 }
1718 if ( fVarTransformString == "None") {
1719 if (fUseDecorr)
1720 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1721 } else if ( fVarTransformString == "Decorrelate" ) {
1722 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1723 } else if ( fVarTransformString == "PCA" ) {
1724 varTrafo = GetTransformationHandler().AddTransformation( new VariablePCATransform(DataInfo()), -1 );
1725 } else if ( fVarTransformString == "Uniform" ) {
1726 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1727 } else if ( fVarTransformString == "Gauss" ) {
1728 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1729 } else if ( fVarTransformString == "GaussDecorr" ) {
1730 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1731 varTrafo2 = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1732 } else {
1733 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1734 << fVarTransformString << "' unknown." << Endl;
1735 }
1736 // Now read decorrelation matrix if available
1737 if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1738 fin.getline(buf,512);
1739 while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1740 if (varTrafo) {
1741 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1742 varTrafo->ReadTransformationFromStream(fin, trafo );
1743 }
1744 if (varTrafo2) {
1745 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1746 varTrafo2->ReadTransformationFromStream(fin, trafo );
1747 }
1748 }
1749
1750
1751 if (HasMVAPdfs()) {
1752 // Now read the MVA PDFs
1753 fin.getline(buf,512);
1754 while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1755 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1756 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1757 fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1758 fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1759 fMVAPdfS->SetReadingVersion( GetTrainingTMVAVersionCode() );
1760 fMVAPdfB->SetReadingVersion( GetTrainingTMVAVersionCode() );
1761
1762 fin >> *fMVAPdfS;
1763 fin >> *fMVAPdfB;
1764 }
1765
1766 // Now read weights
1767 fin.getline(buf,512);
1768 while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1769 fin.getline(buf,512);
1770 ReadWeightsFromStream( fin );
1771
1772 // update transformation handler
1773 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1774
1775}
1776
1777////////////////////////////////////////////////////////////////////////////////
1778/// write the list of variables (name, min, max) for a given data
1779/// transformation method to the stream
1780
1781void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1782{
1783 o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1784 std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1785 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1786 o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1787 varIt = DataInfo().GetSpectatorInfos().begin();
1788 for (; varIt!=DataInfo().GetSpectatorInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1789}
1790
1791////////////////////////////////////////////////////////////////////////////////
1792/// Read the variables (name, min, max) for a given data
1793/// transformation method from the stream. In the stream we only
1794/// expect the limits which will be set
1795
1797{
1798 TString dummy;
1800 istr >> dummy >> readNVar;
1801
1802 if (readNVar!=DataInfo().GetNVariables()) {
1803 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1804 << " while there are " << readNVar << " variables declared in the file"
1805 << Endl;
1806 }
1807
1808 // we want to make sure all variables are read in the order they are defined
1810 std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1811 int varIdx = 0;
1812 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt, ++varIdx) {
1813 varInfo.ReadFromStream(istr);
1814 if (varIt->GetExpression() == varInfo.GetExpression()) {
1815 varInfo.SetExternalLink((*varIt).GetExternalLink());
1816 (*varIt) = varInfo;
1817 }
1818 else {
1819 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1820 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1821 Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1822 Log() << kINFO << "the correct working of the method):" << Endl;
1823 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1824 Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1825 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1826 }
1827 }
1828}
1829
1830////////////////////////////////////////////////////////////////////////////////
1831/// write variable info to XML
1832
1833void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1834{
1835 void* vars = gTools().AddChild(parent, "Variables");
1836 gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1837
1838 for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1839 VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1840 void* var = gTools().AddChild( vars, "Variable" );
1841 gTools().AddAttr( var, "VarIndex", idx );
1842 vi.AddToXML( var );
1843 }
1844}
1845
1846////////////////////////////////////////////////////////////////////////////////
1847/// write spectator info to XML
1848
1850{
1851 void* specs = gTools().AddChild(parent, "Spectators");
1852
1853 UInt_t writeIdx=0;
1854 for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1855
1856 VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1857
1858 // we do not want to write spectators that are category-cuts,
1859 // except if the method is the category method and the spectators belong to it
1860 if (vi.GetVarType()=='C') continue;
1861
1862 void* spec = gTools().AddChild( specs, "Spectator" );
1863 gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1864 vi.AddToXML( spec );
1865 }
1866 gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1867}
1868
1869////////////////////////////////////////////////////////////////////////////////
1870/// write class info to XML
1871
1872void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1873{
1874 UInt_t nClasses=DataInfo().GetNClasses();
1875
1876 void* classes = gTools().AddChild(parent, "Classes");
1877 gTools().AddAttr( classes, "NClass", nClasses );
1878
1879 for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1880 ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1881 TString className =classInfo->GetName();
1882 UInt_t classNumber=classInfo->GetNumber();
1883
1884 void* classNode=gTools().AddChild(classes, "Class");
1885 gTools().AddAttr( classNode, "Name", className );
1886 gTools().AddAttr( classNode, "Index", classNumber );
1887 }
1888}
1889////////////////////////////////////////////////////////////////////////////////
1890/// write target info to XML
1891
1892void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1893{
1894 void* targets = gTools().AddChild(parent, "Targets");
1895 gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1896
1897 for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1898 VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1899 void* tar = gTools().AddChild( targets, "Target" );
1900 gTools().AddAttr( tar, "TargetIndex", idx );
1901 vi.AddToXML( tar );
1902 }
1903}
1904
1905////////////////////////////////////////////////////////////////////////////////
1906/// read variable info from XML
1907
1909{
1911 gTools().ReadAttr( varnode, "NVar", readNVar);
1912
1913 if (readNVar!=DataInfo().GetNVariables()) {
1914 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1915 << " while there are " << readNVar << " variables declared in the file"
1916 << Endl;
1917 }
1918
1919 // we want to make sure all variables are read in the order they are defined
1921 int varIdx = 0;
1922 void* ch = gTools().GetChild(varnode);
1923 while (ch) {
1924 gTools().ReadAttr( ch, "VarIndex", varIdx);
1925 existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1926 readVarInfo.ReadFromXML(ch);
1927
1928 if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1929 readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1931 }
1932 else {
1933 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1934 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1935 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1936 Log() << kINFO << "correct working of the method):" << Endl;
1937 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1938 Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1939 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1940 }
1941 ch = gTools().GetNextChild(ch);
1942 }
1943}
1944
1945////////////////////////////////////////////////////////////////////////////////
1946/// read spectator info from XML
1947
1949{
1951 gTools().ReadAttr( specnode, "NSpec", readNSpec);
1952
1953 if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1954 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1955 << " while there are " << readNSpec << " spectators declared in the file"
1956 << Endl;
1957 }
1958
1959 // we want to make sure all variables are read in the order they are defined
1961 int specIdx = 0;
1962 void* ch = gTools().GetChild(specnode);
1963 while (ch) {
1964 gTools().ReadAttr( ch, "SpecIndex", specIdx);
1965 existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1966 readSpecInfo.ReadFromXML(ch);
1967
1968 if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1969 readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1971 }
1972 else {
1973 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1974 Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1975 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1976 Log() << kINFO << "correct working of the method):" << Endl;
1977 Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1978 Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1979 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1980 }
1981 ch = gTools().GetNextChild(ch);
1982 }
1983}
1984
1985////////////////////////////////////////////////////////////////////////////////
1986/// read number of classes from XML
1987
1989{
1991 // coverity[tainted_data_argument]
1992 gTools().ReadAttr( clsnode, "NClass", readNCls);
1993
1994 TString className="";
1996 void* ch = gTools().GetChild(clsnode);
1997 if (!ch) {
1998 for (UInt_t icls = 0; icls<readNCls;++icls) {
1999 TString classname = TString::Format("class%i",icls);
2000 DataInfo().AddClass(classname);
2001
2002 }
2003 }
2004 else{
2005 while (ch) {
2006 gTools().ReadAttr( ch, "Index", classIndex);
2007 gTools().ReadAttr( ch, "Name", className );
2008 DataInfo().AddClass(className);
2009
2010 ch = gTools().GetNextChild(ch);
2011 }
2012 }
2013
2014 // retrieve signal and background class index
2015 if (DataInfo().GetClassInfo("Signal") != 0) {
2016 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
2017 }
2018 else
2019 fSignalClass=0;
2020 if (DataInfo().GetClassInfo("Background") != 0) {
2021 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
2022 }
2023 else
2024 fBackgroundClass=1;
2025}
2026
2027////////////////////////////////////////////////////////////////////////////////
2028/// read target info from XML
2029
2031{
2033 gTools().ReadAttr( tarnode, "NTrgt", readNTar);
2034
2035 int tarIdx = 0;
2036 TString expression;
2037 void* ch = gTools().GetChild(tarnode);
2038 while (ch) {
2039 gTools().ReadAttr( ch, "TargetIndex", tarIdx);
2040 gTools().ReadAttr( ch, "Expression", expression);
2041 DataInfo().AddTarget(expression,"","",0,0);
2042
2043 ch = gTools().GetNextChild(ch);
2044 }
2045}
2046
2047////////////////////////////////////////////////////////////////////////////////
2048/// returns the ROOT directory where info/histograms etc of the
2049/// corresponding MVA method instance are stored
2050
2052{
2053 if (fBaseDir != 0) return fBaseDir;
2054 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
2055
2056 if (IsSilentFile()) {
2057 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2058 << "MethodBase::BaseDir() - No directory exists when running a Method without output file. Enable the "
2059 "output when creating the factory"
2060 << Endl;
2061 }
2062
2063 TDirectory* methodDir = MethodBaseDir();
2064 if (methodDir==0)
2065 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
2066
2067 TString defaultDir = GetMethodName();
2068 TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
2069 if(!sdir)
2070 {
2071 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
2072 sdir = methodDir->mkdir(defaultDir);
2073 sdir->cd();
2074 // write weight file name into target file
2075 if (fModelPersistence) {
2077 TObjString wfileName( GetWeightFileName() );
2078 wfilePath.Write( "TrainingPath" );
2079 wfileName.Write( "WeightFileName" );
2080 }
2081 }
2082
2083 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
2084 return sdir;
2085}
2086
2087////////////////////////////////////////////////////////////////////////////////
2088/// returns the ROOT directory where all instances of the
2089/// corresponding MVA method are stored
2090
2092{
2093 if (fMethodBaseDir != 0) {
2094 return fMethodBaseDir;
2095 }
2096
2097 const char *datasetName = DataInfo().GetName();
2098
2099 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodTypeName()
2100 << " not set yet --> check if already there.." << Endl;
2101
2102 TDirectory *factoryBaseDir = GetFile();
2103 if (!factoryBaseDir) return nullptr;
2104 fMethodBaseDir = factoryBaseDir->GetDirectory(datasetName);
2105 if (!fMethodBaseDir) {
2106 fMethodBaseDir = factoryBaseDir->mkdir(datasetName, TString::Format("Base directory for dataset %s", datasetName).Data());
2107 if (!fMethodBaseDir) {
2108 Log() << kFATAL << "Can not create dir " << datasetName;
2109 }
2110 }
2111 TString methodTypeDir = TString::Format("Method_%s", GetMethodTypeName().Data());
2112 fMethodBaseDir = fMethodBaseDir->GetDirectory(methodTypeDir.Data());
2113
2114 if (!fMethodBaseDir) {
2116 TString methodTypeDirHelpStr = TString::Format("Directory for all %s methods", GetMethodTypeName().Data());
2117 fMethodBaseDir = datasetDir->mkdir(methodTypeDir.Data(), methodTypeDirHelpStr);
2118 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodName()
2119 << " does not exist yet--> created it" << Endl;
2120 }
2121
2122 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName)
2123 << "Return from MethodBaseDir() after creating base directory " << Endl;
2124 return fMethodBaseDir;
2125}
2126
2127////////////////////////////////////////////////////////////////////////////////
2128/// set directory of weight file
2129
2131{
2132 fFileDir = fileDir;
2133 gSystem->mkdir( fFileDir, kTRUE );
2134}
2135
2136////////////////////////////////////////////////////////////////////////////////
2137/// set the weight file name (depreciated)
2138
2143
2144////////////////////////////////////////////////////////////////////////////////
2145/// retrieve weight file name
2146
2148{
2149 if (fWeightFile!="") return fWeightFile;
2150
2151 // the default consists of
2152 // directory/jobname_methodname_suffix.extension.{root/txt}
2153 TString suffix = "";
2154 TString wFileDir(GetWeightFileDir());
2155 TString wFileName = GetJobName() + "_" + GetMethodName() +
2156 suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml";
2157 if (wFileDir.IsNull() ) return wFileName;
2158 // add weight file directory of it is not null
2159 return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2160 + wFileName );
2161}
2162////////////////////////////////////////////////////////////////////////////////
2163/// writes all MVA evaluation histograms to file
2164
2166{
2167 BaseDir()->cd();
2168
2169
2170 // write MVA PDFs to file - if exist
2171 if (0 != fMVAPdfS) {
2172 fMVAPdfS->GetOriginalHist()->Write();
2173 fMVAPdfS->GetSmoothedHist()->Write();
2174 fMVAPdfS->GetPDFHist()->Write();
2175 }
2176 if (0 != fMVAPdfB) {
2177 fMVAPdfB->GetOriginalHist()->Write();
2178 fMVAPdfB->GetSmoothedHist()->Write();
2179 fMVAPdfB->GetPDFHist()->Write();
2180 }
2181
2182 // write result-histograms
2183 Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2184 if (!results)
2185 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2186 << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2187 << "/kMaxAnalysisType" << Endl;
2188 results->GetStorage()->Write();
2190 // skipping plotting of variables if too many (default is 200)
2191 if ((int) DataInfo().GetNVariables()< gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables)
2192 GetTransformationHandler().PlotVariables (GetEventCollection( Types::kTesting ), BaseDir() );
2193 else
2194 Log() << kINFO << TString::Format("Dataset[%s] : ",DataInfo().GetName())
2195 << " variable plots are not produces ! The number of variables is " << DataInfo().GetNVariables()
2196 << " , it is larger than " << gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables << Endl;
2197 }
2198}
2199
2200////////////////////////////////////////////////////////////////////////////////
2201/// write special monitoring histograms to file
2202/// dummy implementation here -----------------
2203
2207
2208////////////////////////////////////////////////////////////////////////////////
2209/// reads one line from the input stream
2210/// checks for certain keywords and interprets
2211/// the line if keywords are found
2212
2213Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2214{
2215 fin.getline(buf,512);
2216 TString line(buf);
2217 if (line.BeginsWith("TMVA Release")) {
2218 Ssiz_t start = line.First('[')+1;
2219 Ssiz_t length = line.Index("]",start)-start;
2220 TString code = line(start,length);
2221 std::stringstream s(code.Data());
2222 s >> fTMVATrainingVersion;
2223 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2224 }
2225 if (line.BeginsWith("ROOT Release")) {
2226 Ssiz_t start = line.First('[')+1;
2227 Ssiz_t length = line.Index("]",start)-start;
2228 TString code = line(start,length);
2229 std::stringstream s(code.Data());
2230 s >> fROOTTrainingVersion;
2231 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2232 }
2233 if (line.BeginsWith("Analysis type")) {
2234 Ssiz_t start = line.First('[')+1;
2235 Ssiz_t length = line.Index("]",start)-start;
2236 TString code = line(start,length);
2237 std::stringstream s(code.Data());
2238 std::string analysisType;
2239 s >> analysisType;
2240 if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2241 else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2242 else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2243 else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2244
2245 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2246 << (GetAnalysisType() == Types::kRegression ? "Regression" :
2247 (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2248 }
2249
2250 return true;
2251}
2252
2253////////////////////////////////////////////////////////////////////////////////
2254/// Create PDFs of the MVA output variables
2255
2257{
2258 Data()->SetCurrentType(Types::kTraining);
2259
2260 // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2261 // otherwise they will be only used 'online'
2263 ( Data()->GetResults(GetMethodName(), Types::kTraining, Types::kClassification) );
2264
2265 if (mvaRes==0 || mvaRes->GetSize()==0) {
2266 Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2267 }
2268
2269 Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2270 Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2271
2272 // create histograms that serve as basis to create the MVA Pdfs
2273 TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2274 fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2275 TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2276 fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2277
2278
2279 // compute sum of weights properly
2280 histMVAPdfS->Sumw2();
2281 histMVAPdfB->Sumw2();
2282
2283 // fill histograms
2284 for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2285 Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2286 Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2287
2288 if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2289 else histMVAPdfB->Fill( theVal, theWeight );
2290 }
2291
2294
2295 // momentary hack for ROOT problem
2296 if(!IsSilentFile())
2297 {
2298 histMVAPdfS->Write();
2299 histMVAPdfB->Write();
2300 }
2301 // create PDFs
2302 fMVAPdfS->BuildPDF ( histMVAPdfS );
2303 fMVAPdfB->BuildPDF ( histMVAPdfB );
2304 fMVAPdfS->ValidatePDF( histMVAPdfS );
2305 fMVAPdfB->ValidatePDF( histMVAPdfB );
2306
2307 if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2308 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2309 << TString::Format( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2310 GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2311 << Endl;
2312 }
2313
2314 delete histMVAPdfS;
2315 delete histMVAPdfB;
2316}
2317
2319 // the simple one, automatically calculates the mvaVal and uses the
2320 // SAME sig/bkg ratio as given in the training sample (typically 50/50
2321 // .. (NormMode=EqualNumEvents) but can be different)
2322 if (!fMVAPdfS || !fMVAPdfB) {
2323 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2324 CreateMVAPdfs();
2325 }
2326 Double_t sigFraction = DataInfo().GetTrainingSumSignalWeights() / (DataInfo().GetTrainingSumSignalWeights() + DataInfo().GetTrainingSumBackgrWeights() );
2327 Double_t mvaVal = GetMvaValue(ev);
2328
2329 return GetProba(mvaVal,sigFraction);
2330
2331}
2332////////////////////////////////////////////////////////////////////////////////
2333/// compute likelihood ratio
2334
2336{
2337 if (!fMVAPdfS || !fMVAPdfB) {
2338 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2339 return -1.0;
2340 }
2341 Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2342 Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2343
2344 Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2345
2346 return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2347}
2348
2349////////////////////////////////////////////////////////////////////////////////
2350/// compute rarity:
2351/// \f[
2352/// R(x) = \int_{[-\infty..x]} { PDF(x') dx' }
2353/// \f]
2354/// where PDF(x) is the PDF of the classifier's signal or background distribution
2355
2357{
2358 if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2359 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2360 << "select option \"CreateMVAPdfs\"" << Endl;
2361 return 0.0;
2362 }
2363
2364 PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2365
2366 return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2367}
2368
2369////////////////////////////////////////////////////////////////////////////////
2370/// fill background efficiency (resp. rejection) versus signal efficiency plots
2371/// returns signal efficiency at background efficiency indicated in theString
2372
2374{
2375 Data()->SetCurrentType(type);
2376 Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2377 std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2378
2379 // parse input string for required background efficiency
2381
2382 // sanity check
2384 if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2385 else if (list->GetSize() > 2) {
2386 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2387 << " in string: " << theString
2388 << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2389 delete list;
2390 return -1;
2391 }
2392
2393 // sanity check
2394 if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2395 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2396 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2397 delete list;
2398 return -1.0;
2399 }
2400
2401 // create histograms
2402
2403 // first, get efficiency histograms for signal and background
2404 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2405 Double_t xmin = effhist->GetXaxis()->GetXmin();
2406 Double_t xmax = effhist->GetXaxis()->GetXmax();
2407
2409
2410 // first round ? --> create histograms
2411 if (results->DoesExist("MVA_EFF_S")==0) {
2412
2413 // for efficiency plot
2414 TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2415 TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2416 results->Store(eff_s, "MVA_EFF_S");
2417 results->Store(eff_b, "MVA_EFF_B");
2418
2419 // sign if cut
2420 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2421
2422 // this method is unbinned
2423 nevtS = 0;
2424 for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2425
2426 // read the tree
2427 Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2428 Float_t theWeight = GetEvent(ievt)->GetWeight();
2429 Float_t theVal = (*mvaRes)[ievt];
2430
2431 // select histogram depending on if sig or bgd
2433
2434 // count signal and background events in tree
2435 if (isSignal) nevtS+=theWeight;
2436
2437 TAxis* axis = theHist->GetXaxis();
2438 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2439 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2440 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2441 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2442 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2443
2444 if (sign > 0)
2445 for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2446 else if (sign < 0)
2447 for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2448 else
2449 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2450 }
2451
2452 // renormalise maximum to <=1
2453 // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2454 // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2455
2456 eff_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_s->GetMaximum()) );
2457 eff_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_b->GetMaximum()) );
2458
2459 // background efficiency versus signal efficiency
2460 TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2461 results->Store(eff_BvsS, "MVA_EFF_BvsS");
2462 eff_BvsS->SetXTitle( "Signal eff" );
2463 eff_BvsS->SetYTitle( "Backgr eff" );
2464
2465 // background rejection (=1-eff.) versus signal efficiency
2466 TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2467 results->Store(rej_BvsS);
2468 rej_BvsS->SetXTitle( "Signal eff" );
2469 rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2470
2471 // inverse background eff (1/eff.) versus signal efficiency
2472 TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2473 GetTestvarName(), fNbins, 0, 1 );
2474 results->Store(inveff_BvsS);
2475 inveff_BvsS->SetXTitle( "Signal eff" );
2476 inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2477
2478 // use root finder
2479 // spline background efficiency plot
2480 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2482 fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2483 fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2484
2485 // verify spline sanity
2486 gTools().CheckSplines( eff_s, fSplRefS );
2487 gTools().CheckSplines( eff_b, fSplRefB );
2488 }
2489
2490 // make the background-vs-signal efficiency plot
2491
2492 // create root finder
2493 RootFinder rootFinder( this, fXmin, fXmax );
2494
2495 Double_t effB = 0;
2496 fEffS = eff_s; // to be set for the root finder
2497 for (Int_t bini=1; bini<=fNbins; bini++) {
2498
2499 // find cut value corresponding to a given signal efficiency
2500 Double_t effS = eff_BvsS->GetBinCenter( bini );
2501 Double_t cut = rootFinder.Root( effS );
2502
2503 // retrieve background efficiency for given cut
2504 if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2505 else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2506
2507 // and fill histograms
2508 eff_BvsS->SetBinContent( bini, effB );
2509 rej_BvsS->SetBinContent( bini, 1.0-effB );
2510 if (effB>std::numeric_limits<double>::epsilon())
2511 inveff_BvsS->SetBinContent( bini, 1.0/effB );
2512 }
2513
2514 // create splines for histogram
2515 fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2516
2517 // search for overlap point where, when cutting on it,
2518 // one would obtain: eff_S = rej_B = 1 - eff_B
2519 Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2520 Int_t nbins_ = 5000;
2521 for (Int_t bini=1; bini<=nbins_; bini++) {
2522
2523 // get corresponding signal and background efficiencies
2524 effS = (bini - 0.5)/Float_t(nbins_);
2525 rejB = 1.0 - fSpleffBvsS->Eval( effS );
2526
2527 // find signal efficiency that corresponds to required background efficiency
2528 if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2529 effS_ = effS;
2530 rejB_ = rejB;
2531 }
2532
2533 // find cut that corresponds to signal efficiency and update signal-like criterion
2534 Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2535 SetSignalReferenceCut( cut );
2536 fEffS = 0;
2537 }
2538
2539 // must exist...
2540 if (0 == fSpleffBvsS) {
2541 delete list;
2542 return 0.0;
2543 }
2544
2545 // now find signal efficiency that corresponds to required background efficiency
2546 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2547 Int_t nbins_ = 1000;
2548
2549 if (computeArea) {
2550
2551 // compute area of rej-vs-eff plot
2552 Double_t integral = 0;
2553 for (Int_t bini=1; bini<=nbins_; bini++) {
2554
2555 // get corresponding signal and background efficiencies
2556 effS = (bini - 0.5)/Float_t(nbins_);
2557 effB = fSpleffBvsS->Eval( effS );
2558 integral += (1.0 - effB);
2559 }
2560 integral /= nbins_;
2561
2562 delete list;
2563 return integral;
2564 }
2565 else {
2566
2567 // that will be the value of the efficiency retured (does not affect
2568 // the efficiency-vs-bkg plot which is done anyway.
2569 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2570
2571 // find precise efficiency value
2572 for (Int_t bini=1; bini<=nbins_; bini++) {
2573
2574 // get corresponding signal and background efficiencies
2575 effS = (bini - 0.5)/Float_t(nbins_);
2576 effB = fSpleffBvsS->Eval( effS );
2577
2578 // find signal efficiency that corresponds to required background efficiency
2579 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2580 effS_ = effS;
2581 effB_ = effB;
2582 }
2583
2584 // take mean between bin above and bin below
2585 effS = 0.5*(effS + effS_);
2586
2587 effSerr = 0;
2588 if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2589
2590 delete list;
2591 return effS;
2592 }
2593
2594 return -1;
2595}
2596
2597////////////////////////////////////////////////////////////////////////////////
2598
2600{
2601 Data()->SetCurrentType(Types::kTraining);
2602
2603 Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, Types::kNoAnalysisType);
2604
2605 // fill background efficiency (resp. rejection) versus signal efficiency plots
2606 // returns signal efficiency at background efficiency indicated in theString
2607
2608 // parse input string for required background efficiency
2610 // sanity check
2611
2612 if (list->GetSize() != 2) {
2613 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2614 << " in string: " << theString
2615 << " | required format, e.g., Efficiency:0.05" << Endl;
2616 delete list;
2617 return -1;
2618 }
2619 // that will be the value of the efficiency retured (does not affect
2620 // the efficiency-vs-bkg plot which is done anyway.
2621 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2622
2623 delete list;
2624
2625 // sanity check
2626 if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2627 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2628 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2629 << Endl;
2630 return -1.0;
2631 }
2632
2633 // create histogram
2634
2635 // first, get efficiency histograms for signal and background
2636 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2637 Double_t xmin = effhist->GetXaxis()->GetXmin();
2638 Double_t xmax = effhist->GetXaxis()->GetXmax();
2639
2640 // first round ? --> create and fill histograms
2641 if (results->DoesExist("MVA_TRAIN_S")==0) {
2642
2643 // classifier response distributions for test sample
2644 Double_t sxmax = fXmax+0.00001;
2645
2646 // MVA plots on the training sample (check for overtraining)
2647 TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2648 TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2649 results->Store(mva_s_tr, "MVA_TRAIN_S");
2650 results->Store(mva_b_tr, "MVA_TRAIN_B");
2651 mva_s_tr->Sumw2();
2652 mva_b_tr->Sumw2();
2653
2654 // Training efficiency plots
2655 TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2656 fNbinsH, xmin, xmax );
2657 TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2658 fNbinsH, xmin, xmax );
2659 results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2660 results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2661
2662 // sign if cut
2663 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2664
2665 std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2666 assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2667
2668 // this method is unbinned
2669 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2670
2671 Data()->SetCurrentEvent(ievt);
2672 const Event* ev = GetEvent();
2673
2674 Double_t theVal = mvaValues[ievt];
2675 Double_t theWeight = ev->GetWeight();
2676
2677 TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2678 TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2679
2680 theClsHist->Fill( theVal, theWeight );
2681
2682 TAxis* axis = theEffHist->GetXaxis();
2683 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2684 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2685 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2686 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2687 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2688
2689 if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2690 else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2691 }
2692
2693 // normalise output distributions
2694 // uncomment those (and several others if you want unnormalized output
2697
2698 // renormalise to maximum
2699 mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2700 mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2701
2702 // Training background efficiency versus signal efficiency
2703 TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2704 // Training background rejection (=1-eff.) versus signal efficiency
2705 TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2706 results->Store(eff_bvss, "EFF_BVSS_TR");
2707 results->Store(rej_bvss, "REJ_BVSS_TR");
2708
2709 // use root finder
2710 // spline background efficiency plot
2711 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2713 if (fSplTrainRefS) delete fSplTrainRefS;
2714 if (fSplTrainRefB) delete fSplTrainRefB;
2715 fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2716 fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2717
2718 // verify spline sanity
2719 gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2720 gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2721 }
2722
2723 // make the background-vs-signal efficiency plot
2724
2725 // create root finder
2726 RootFinder rootFinder(this, fXmin, fXmax );
2727
2728 Double_t effB = 0;
2729 fEffS = results->GetHist("MVA_TRAINEFF_S");
2730 for (Int_t bini=1; bini<=fNbins; bini++) {
2731
2732 // find cut value corresponding to a given signal efficiency
2733 Double_t effS = eff_bvss->GetBinCenter( bini );
2734
2735 Double_t cut = rootFinder.Root( effS );
2736
2737 // retrieve background efficiency for given cut
2738 if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2739 else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2740
2741 // and fill histograms
2742 eff_bvss->SetBinContent( bini, effB );
2743 rej_bvss->SetBinContent( bini, 1.0-effB );
2744 }
2745 fEffS = 0;
2746
2747 // create splines for histogram
2748 fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2749 }
2750
2751 // must exist...
2752 if (0 == fSplTrainEffBvsS) return 0.0;
2753
2754 // now find signal efficiency that corresponds to required background efficiency
2755 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2756 Int_t nbins_ = 1000;
2757 for (Int_t bini=1; bini<=nbins_; bini++) {
2758
2759 // get corresponding signal and background efficiencies
2760 effS = (bini - 0.5)/Float_t(nbins_);
2761 effB = fSplTrainEffBvsS->Eval( effS );
2762
2763 // find signal efficiency that corresponds to required background efficiency
2764 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2765 effS_ = effS;
2766 effB_ = effB;
2767 }
2768
2769 return 0.5*(effS + effS_); // the mean between bin above and bin below
2770}
2771
2772////////////////////////////////////////////////////////////////////////////////
2773
2774std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2775{
2776 Data()->SetCurrentType(Types::kTesting);
2777 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
2778 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2779
2780 purity.push_back(resMulticlass->GetAchievablePur());
2781 return resMulticlass->GetAchievableEff();
2782}
2783
2784////////////////////////////////////////////////////////////////////////////////
2785
2786std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2787{
2788 Data()->SetCurrentType(Types::kTraining);
2789 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTraining, Types::kMulticlass));
2790 if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2791
2792 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2793 for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2794 resMulticlass->GetBestMultiClassCuts(icls);
2795 }
2796
2797 purity.push_back(resMulticlass->GetAchievablePur());
2798 return resMulticlass->GetAchievableEff();
2799}
2800
2801////////////////////////////////////////////////////////////////////////////////
2802/// Construct a confusion matrix for a multiclass classifier. The confusion
2803/// matrix compares, in turn, each class agaist all other classes in a pair-wise
2804/// fashion. In rows with index \f$ k_r = 0 ... K \f$, \f$ k_r \f$ is
2805/// considered signal for the sake of comparison and for each column
2806/// \f$ k_c = 0 ... K \f$ the corresponding class is considered background.
2807///
2808/// Note that the diagonal elements will be returned as NaN since this will
2809/// compare a class against itself.
2810///
2811/// \see TMVA::ResultsMulticlass::GetConfusionMatrix
2812///
2813/// \param[in] effB The background efficiency for which to evaluate.
2814/// \param[in] type The data set on which to evaluate (training, testing ...).
2815///
2816/// \return A matrix containing signal efficiencies for the given background
2817/// efficiency. The diagonal elements are NaN since this measure is
2818/// meaningless (comparing a class against itself).
2819///
2820
2822{
2823 if (GetAnalysisType() != Types::kMulticlass) {
2824 Log() << kFATAL << "Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2825 return TMatrixD(0, 0);
2826 }
2827
2828 Data()->SetCurrentType(type);
2830 dynamic_cast<ResultsMulticlass *>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
2831
2832 if (resMulticlass == nullptr) {
2833 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2834 << "unable to create pointer in GetMulticlassEfficiency, exiting." << Endl;
2835 return TMatrixD(0, 0);
2836 }
2837
2838 return resMulticlass->GetConfusionMatrix(effB);
2839}
2840
2841////////////////////////////////////////////////////////////////////////////////
2842/// compute significance of mean difference
2843/// \f[
2844/// significance = \frac{|<S> - <B>|}{\sqrt{RMS_{S2} + RMS_{B2}}}
2845/// \f]
2846
2848{
2849 Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2850
2851 return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2852}
2853
2854////////////////////////////////////////////////////////////////////////////////
2855/// compute "separation" defined as
2856/// \f[
2857/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2858/// \f]
2859
2864
2865////////////////////////////////////////////////////////////////////////////////
2866/// compute "separation" defined as
2867/// \f[
2868/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2869/// \f]
2870
2872{
2873 // note, if zero pointers given, use internal pdf
2874 // sanity check first
2875 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2876 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2877 if (!pdfS) pdfS = fSplS;
2878 if (!pdfB) pdfB = fSplB;
2879
2880 if (!fSplS || !fSplB) {
2881 Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2882 << " fSplS or fSplB are not yet filled" << Endl;
2883 return 0;
2884 }else{
2885 return gTools().GetSeparation( *pdfS, *pdfB );
2886 }
2887}
2888
2889////////////////////////////////////////////////////////////////////////////////
2890/// calculate the area (integral) under the ROC curve as a
2891/// overall quality measure of the classification
2892
2894{
2895 // note, if zero pointers given, use internal pdf
2896 // sanity check first
2897 if ((!histS && histB) || (histS && !histB))
2898 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2899
2900 if (histS==0 || histB==0) return 0.;
2901
2902 TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2903 TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2904
2905
2906 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2907 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2908
2909 Double_t integral = 0;
2910 UInt_t nsteps = 1000;
2911 Double_t step = (xmax-xmin)/Double_t(nsteps);
2912 Double_t cut = xmin;
2913 for (UInt_t i=0; i<nsteps; i++) {
2914 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2915 cut+=step;
2916 }
2917 delete pdfS;
2918 delete pdfB;
2919 return integral*step;
2920}
2921
2922
2923////////////////////////////////////////////////////////////////////////////////
2924/// calculate the area (integral) under the ROC curve as a
2925/// overall quality measure of the classification
2926
2928{
2929 // note, if zero pointers given, use internal pdf
2930 // sanity check first
2931 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2932 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2933 if (!pdfS) pdfS = fSplS;
2934 if (!pdfB) pdfB = fSplB;
2935
2936 if (pdfS==0 || pdfB==0) return 0.;
2937
2938 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2939 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2940
2941 Double_t integral = 0;
2942 UInt_t nsteps = 1000;
2943 Double_t step = (xmax-xmin)/Double_t(nsteps);
2944 Double_t cut = xmin;
2945 for (UInt_t i=0; i<nsteps; i++) {
2946 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2947 cut+=step;
2948 }
2949 return integral*step;
2950}
2951
2952////////////////////////////////////////////////////////////////////////////////
2953/// plot significance, \f$ \frac{S}{\sqrt{S^2 + B^2}} \f$, curve for given number
2954/// of signal and background events; returns cut for maximum significance
2955/// also returned via reference is the maximum significance
2956
2960{
2961 Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, Types::kMaxAnalysisType );
2962
2964 Double_t effS(0),effB(0),significance(0);
2965 TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2966
2967 if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2968 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2969 << "Number of signal or background events is <= 0 ==> abort"
2970 << Endl;
2971 }
2972
2973 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2975
2976 TH1* eff_s = results->GetHist("MVA_EFF_S");
2977 TH1* eff_b = results->GetHist("MVA_EFF_B");
2978
2979 if ( (eff_s==0) || (eff_b==0) ) {
2980 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2981 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2982 return 0;
2983 }
2984
2985 for (Int_t bin=1; bin<=fNbinsH; bin++) {
2986 effS = eff_s->GetBinContent( bin );
2987 effB = eff_b->GetBinContent( bin );
2988
2989 // put significance into a histogram
2991
2992 temp_histogram->SetBinContent(bin,significance);
2993 }
2994
2995 // find maximum in histogram
2996 max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
2997 max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
2998
2999 // delete
3000 delete temp_histogram;
3001
3002 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
3003 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
3004
3005 return max_significance;
3006}
3007
3008////////////////////////////////////////////////////////////////////////////////
3009/// calculates rms,mean, xmin, xmax of the event variable
3010/// this can be either done for the variables as they are or for
3011/// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
3012
3017{
3018 Types::ETreeType previousTreeType = Data()->GetCurrentType();
3019 Data()->SetCurrentType(treeType);
3020
3021 Long64_t entries = Data()->GetNEvents();
3022
3023 // sanity check
3024 if (entries <=0)
3025 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
3026
3027 // index of the wanted variable
3028 UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
3029
3030 // first fill signal and background in arrays before analysis
3031 xmin = +DBL_MAX;
3032 xmax = -DBL_MAX;
3033
3034 // take into account event weights
3035 meanS = 0;
3036 meanB = 0;
3037 rmsS = 0;
3038 rmsB = 0;
3039 Double_t sumwS = 0, sumwB = 0;
3040
3041 // loop over all training events
3042 for (Int_t ievt = 0; ievt < entries; ievt++) {
3043
3044 const Event* ev = GetEvent(ievt);
3045
3046 Double_t theVar = ev->GetValue(varIndex);
3047 Double_t weight = ev->GetWeight();
3048
3049 if (DataInfo().IsSignal(ev)) {
3050 sumwS += weight;
3051 meanS += weight*theVar;
3052 rmsS += weight*theVar*theVar;
3053 }
3054 else {
3055 sumwB += weight;
3056 meanB += weight*theVar;
3057 rmsB += weight*theVar*theVar;
3058 }
3059 xmin = TMath::Min( xmin, theVar );
3060 xmax = TMath::Max( xmax, theVar );
3061 }
3062
3063 meanS = meanS/sumwS;
3064 meanB = meanB/sumwB;
3067
3068 Data()->SetCurrentType(previousTreeType);
3069}
3070
3071////////////////////////////////////////////////////////////////////////////////
3072/// create reader class for method (classification only at present)
3073
3075{
3076 // the default consists of
3078 if (theClassFileName == "")
3079 classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
3080 else
3082
3083 TString className = TString("Read") + GetMethodName();
3084
3086 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
3087 << "Creating standalone class: "
3088 << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
3089
3090 std::ofstream fout( classFileName );
3091 if (!fout.good()) { // file could not be opened --> Error
3092 Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
3093 }
3094
3095 // now create the class
3096 // preamble
3097 fout << "// Class: " << className << std::endl;
3098 fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
3099
3100 // print general information and configuration state
3101 fout << std::endl;
3102 fout << "/* configuration options =====================================================" << std::endl << std::endl;
3103 WriteStateToStream( fout );
3104 fout << std::endl;
3105 fout << "============================================================================ */" << std::endl;
3106
3107 // generate the class
3108 fout << "" << std::endl;
3109 fout << "#include <array>" << std::endl;
3110 fout << "#include <vector>" << std::endl;
3111 fout << "#include <cmath>" << std::endl;
3112 fout << "#include <string>" << std::endl;
3113 fout << "#include <iostream>" << std::endl;
3114 fout << "" << std::endl;
3115 // now if the classifier needs to write some additional classes for its response implementation
3116 // this code goes here: (at least the header declarations need to come before the main class
3117 this->MakeClassSpecificHeader( fout, className );
3118
3119 fout << "#ifndef IClassifierReader__def" << std::endl;
3120 fout << "#define IClassifierReader__def" << std::endl;
3121 fout << std::endl;
3122 fout << "class IClassifierReader {" << std::endl;
3123 fout << std::endl;
3124 fout << " public:" << std::endl;
3125 fout << std::endl;
3126 fout << " // constructor" << std::endl;
3127 fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
3128 fout << " virtual ~IClassifierReader() {}" << std::endl;
3129 fout << std::endl;
3130 fout << " // return classifier response" << std::endl;
3131 if(GetAnalysisType() == Types::kMulticlass) {
3132 fout << " virtual std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3133 } else {
3134 fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3135 }
3136 fout << std::endl;
3137 fout << " // returns classifier status" << std::endl;
3138 fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3139 fout << std::endl;
3140 fout << " protected:" << std::endl;
3141 fout << std::endl;
3142 fout << " bool fStatusIsClean;" << std::endl;
3143 fout << "};" << std::endl;
3144 fout << std::endl;
3145 fout << "#endif" << std::endl;
3146 fout << std::endl;
3147 fout << "class " << className << " : public IClassifierReader {" << std::endl;
3148 fout << std::endl;
3149 fout << " public:" << std::endl;
3150 fout << std::endl;
3151 fout << " // constructor" << std::endl;
3152 fout << " " << className << "( std::vector<std::string>& theInputVars )" << std::endl;
3153 fout << " : IClassifierReader()," << std::endl;
3154 fout << " fClassName( \"" << className << "\" )," << std::endl;
3155 fout << " fNvars( " << GetNvar() << " )" << std::endl;
3156 fout << " {" << std::endl;
3157 fout << " // the training input variables" << std::endl;
3158 fout << " const char* inputVars[] = { ";
3159 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3160 fout << "\"" << GetOriginalVarName(ivar) << "\"";
3161 if (ivar<GetNvar()-1) fout << ", ";
3162 }
3163 fout << " };" << std::endl;
3164 fout << std::endl;
3165 fout << " // sanity checks" << std::endl;
3166 fout << " if (theInputVars.size() <= 0) {" << std::endl;
3167 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3168 fout << " fStatusIsClean = false;" << std::endl;
3169 fout << " }" << std::endl;
3170 fout << std::endl;
3171 fout << " if (theInputVars.size() != fNvars) {" << std::endl;
3172 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3173 fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3174 fout << " fStatusIsClean = false;" << std::endl;
3175 fout << " }" << std::endl;
3176 fout << std::endl;
3177 fout << " // validate input variables" << std::endl;
3178 fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3179 fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3180 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3181 fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3182 fout << " fStatusIsClean = false;" << std::endl;
3183 fout << " }" << std::endl;
3184 fout << " }" << std::endl;
3185 fout << std::endl;
3186 fout << " // initialize min and max vectors (for normalisation)" << std::endl;
3187 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3188 fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
3189 fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
3190 }
3191 fout << std::endl;
3192 fout << " // initialize input variable types" << std::endl;
3193 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3194 fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3195 }
3196 fout << std::endl;
3197 fout << " // initialize constants" << std::endl;
3198 fout << " Initialize();" << std::endl;
3199 fout << std::endl;
3200 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3201 fout << " // initialize transformation" << std::endl;
3202 fout << " InitTransform();" << std::endl;
3203 }
3204 fout << " }" << std::endl;
3205 fout << std::endl;
3206 fout << " // destructor" << std::endl;
3207 fout << " virtual ~" << className << "() {" << std::endl;
3208 fout << " Clear(); // method-specific" << std::endl;
3209 fout << " }" << std::endl;
3210 fout << std::endl;
3211 fout << " // the classifier response" << std::endl;
3212 fout << " // \"inputValues\" is a vector of input values in the same order as the" << std::endl;
3213 fout << " // variables given to the constructor" << std::endl;
3214 if(GetAnalysisType() == Types::kMulticlass) {
3215 fout << " std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const override;" << std::endl;
3216 } else {
3217 fout << " double GetMvaValue( const std::vector<double>& inputValues ) const override;" << std::endl;
3218 }
3219 fout << std::endl;
3220 fout << " private:" << std::endl;
3221 fout << std::endl;
3222 fout << " // method-specific destructor" << std::endl;
3223 fout << " void Clear();" << std::endl;
3224 fout << std::endl;
3225 if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3226 fout << " // input variable transformation" << std::endl;
3227 GetTransformationHandler().MakeFunction(fout, className,1);
3228 fout << " void InitTransform();" << std::endl;
3229 fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3230 fout << std::endl;
3231 }
3232 fout << " // common member variables" << std::endl;
3233 fout << " const char* fClassName;" << std::endl;
3234 fout << std::endl;
3235 fout << " const size_t fNvars;" << std::endl;
3236 fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3237 fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3238 fout << std::endl;
3239 fout << " // normalisation of input variables" << std::endl;
3240 fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3241 fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3242 fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3243 fout << " // normalise to output range: [-1, 1]" << std::endl;
3244 fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3245 fout << " }" << std::endl;
3246 fout << std::endl;
3247 fout << " // type of input variable: 'F' or 'I'" << std::endl;
3248 fout << " char fType[" << GetNvar() << "];" << std::endl;
3249 fout << std::endl;
3250 fout << " // initialize internal variables" << std::endl;
3251 fout << " void Initialize();" << std::endl;
3252 if(GetAnalysisType() == Types::kMulticlass) {
3253 fout << " std::vector<double> GetMulticlassValues__( const std::vector<double>& inputValues ) const;" << std::endl;
3254 } else {
3255 fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3256 }
3257 fout << "" << std::endl;
3258 fout << " // private members (method specific)" << std::endl;
3259
3260 // call the classifier specific output (the classifier must close the class !)
3261 MakeClassSpecific( fout, className );
3262
3263 if(GetAnalysisType() == Types::kMulticlass) {
3264 fout << "inline std::vector<double> " << className << "::GetMulticlassValues( const std::vector<double>& inputValues ) const" << std::endl;
3265 } else {
3266 fout << "inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3267 }
3268 fout << "{" << std::endl;
3269 fout << " // classifier response value" << std::endl;
3270 if(GetAnalysisType() == Types::kMulticlass) {
3271 fout << " std::vector<double> retval;" << std::endl;
3272 } else {
3273 fout << " double retval = 0;" << std::endl;
3274 }
3275 fout << std::endl;
3276 fout << " // classifier response, sanity check first" << std::endl;
3277 fout << " if (!IsStatusClean()) {" << std::endl;
3278 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3279 fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3280 fout << " }" << std::endl;
3281 fout << " else {" << std::endl;
3282 if (IsNormalised()) {
3283 fout << " // normalise variables" << std::endl;
3284 fout << " std::vector<double> iV;" << std::endl;
3285 fout << " iV.reserve(inputValues.size());" << std::endl;
3286 fout << " int ivar = 0;" << std::endl;
3287 fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3288 fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3289 fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3290 fout << " }" << std::endl;
3291 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3292 GetMethodType() != Types::kHMatrix) {
3293 fout << " Transform( iV, -1 );" << std::endl;
3294 }
3295
3296 if(GetAnalysisType() == Types::kMulticlass) {
3297 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3298 } else {
3299 fout << " retval = GetMvaValue__( iV );" << std::endl;
3300 }
3301 } else {
3302 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3303 GetMethodType() != Types::kHMatrix) {
3304 fout << " std::vector<double> iV(inputValues);" << std::endl;
3305 fout << " Transform( iV, -1 );" << std::endl;
3306 if(GetAnalysisType() == Types::kMulticlass) {
3307 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3308 } else {
3309 fout << " retval = GetMvaValue__( iV );" << std::endl;
3310 }
3311 } else {
3312 if(GetAnalysisType() == Types::kMulticlass) {
3313 fout << " retval = GetMulticlassValues__( inputValues );" << std::endl;
3314 } else {
3315 fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3316 }
3317 }
3318 }
3319 fout << " }" << std::endl;
3320 fout << std::endl;
3321 fout << " return retval;" << std::endl;
3322 fout << "}" << std::endl;
3323
3324 // create output for transformation - if any
3325 if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3326 GetTransformationHandler().MakeFunction(fout, className,2);
3327
3328 // close the file
3329 fout.close();
3330}
3331
3332////////////////////////////////////////////////////////////////////////////////
3333/// prints out method-specific help method
3334
3336{
3337 // if options are written to reference file, also append help info
3338 std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3339 std::ofstream* o = 0;
3340 if (gConfig().WriteOptionsReference()) {
3341 Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3342 o = new std::ofstream( GetReferenceFile(), std::ios::app );
3343 if (!o->good()) { // file could not be opened --> Error
3344 Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3345 }
3346 std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3347 }
3348
3349 // "|--------------------------------------------------------------|"
3350 if (!o) {
3351 Log() << kINFO << Endl;
3352 Log() << gTools().Color("bold")
3353 << "================================================================"
3354 << gTools().Color( "reset" )
3355 << Endl;
3356 Log() << gTools().Color("bold")
3357 << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3358 << gTools().Color( "reset" )
3359 << Endl;
3360 }
3361 else {
3362 Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3363 }
3364
3365 // print method-specific help message
3366 GetHelpMessage();
3367
3368 if (!o) {
3369 Log() << Endl;
3370 Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3371 Log() << gTools().Color("bold")
3372 << "================================================================"
3373 << gTools().Color( "reset" )
3374 << Endl;
3375 Log() << Endl;
3376 }
3377 else {
3378 // indicate END
3379 Log() << "# End of Message___" << Endl;
3380 }
3381
3382 std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3383 if (o) o->close();
3384}
3385
3386// ----------------------- r o o t f i n d i n g ----------------------------
3387
3388////////////////////////////////////////////////////////////////////////////////
3389/// returns efficiency as function of cut
3390
3392{
3393 Double_t retval=0;
3394
3395 // retrieve the class object
3397 retval = fSplRefS->Eval( theCut );
3398 }
3399 else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3400
3401 // caution: here we take some "forbidden" action to hide a problem:
3402 // in some cases, in particular for likelihood, the binned efficiency distributions
3403 // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3404 // unbinned information available in the trees, but the unbinned minimization is
3405 // too slow, and we don't need to do a precision measurement here. Hence, we force
3406 // this property.
3407 Double_t eps = 1.0e-5;
3408 if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3409 else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3410
3411 return retval;
3412}
3413
3414////////////////////////////////////////////////////////////////////////////////
3415/// returns the event collection (i.e. the dataset) TRANSFORMED using the
3416/// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3417
3419{
3420 // if there's no variable transformation for this classifier, just hand back the
3421 // event collection of the data set
3422 if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3423 return (Data()->GetEventCollection(type));
3424 }
3425
3426 // otherwise, transform ALL the events and hand back the vector of the pointers to the
3427 // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3428 // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3429 Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3430 if (fEventCollections.at(idx) == 0) {
3431 fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3432 fEventCollections.at(idx) = GetTransformationHandler().CalcTransformations(*(fEventCollections.at(idx)),kTRUE);
3433 }
3434 return *(fEventCollections.at(idx));
3435}
3436
3437////////////////////////////////////////////////////////////////////////////////
3438/// calculates the TMVA version string from the training version code on the fly
3439
3441{
3442 UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3443 UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3444 UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3445
3446 return TString::Format("%i.%i.%i",a,b,c);
3447}
3448
3449////////////////////////////////////////////////////////////////////////////////
3450/// calculates the ROOT version string from the training version code on the fly
3451
3453{
3454 UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3455 UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3456 UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3457
3458 return TString::Format("%i.%02i/%02i",a,b,c);
3459}
3460
3461////////////////////////////////////////////////////////////////////////////////
3462
3465 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
3466
3467 if (mvaRes != NULL) {
3468 TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3469 TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3470 TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3471 TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3472
3473 if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3474
3475 if (SorB == 's' || SorB == 'S')
3476 return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3477 else
3478 return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3479 }
3480 return -1;
3481}
const Bool_t Use_Splines_for_Eff_
const Int_t NBIN_HIST_HIGH
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
#define s1(x)
Definition RSha256.hxx:91
#define ROOT_VERSION_CODE
Definition RVersion.hxx:24
bool Bool_t
Boolean (0=false, 1=true) (bool)
Definition RtypesCore.h:77
int Int_t
Signed integer 4 bytes (int)
Definition RtypesCore.h:59
char Char_t
Character 1 byte (char)
Definition RtypesCore.h:51
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char y2
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char y1
char name[80]
Definition TGX11.cxx:157
float xmin
float xmax
TMatrixT< Double_t > TMatrixD
Definition TMatrixDfwd.h:23
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2495
R__EXTERN TSystem * gSystem
Definition TSystem.h:582
#define TMVA_VERSION_CODE
Definition Version.h:47
const_iterator begin() const
const_iterator end() const
Class to manage histogram axis.
Definition TAxis.h:32
Double_t GetXmax() const
Definition TAxis.h:142
Double_t GetXmin() const
Definition TAxis.h:141
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
Definition TDatime.h:37
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:130
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3765
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:926
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:878
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition TH1.cxx:7627
virtual Int_t GetQuantiles(Int_t n, Double_t *xp, const Double_t *p=nullptr)
Compute Quantiles for this histogram.
Definition TH1.cxx:4649
2-D histogram with a float per channel (see TH1 documentation)
Definition TH2.h:345
Int_t Fill(Double_t) override
Invalid Fill method.
Definition TH2.cxx:363
A doubly linked list.
Definition TList.h:38
Class that contains all the information of a class.
Definition ClassInfo.h:49
TString fWeightFileExtension
Definition Config.h:125
VariablePlotting & GetVariablePlotting()
Definition Config.h:97
class TMVA::Config::VariablePlotting fVariablePlotting
IONames & GetIONames()
Definition Config.h:98
MsgLogger * fLogger
! message logger
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
Definition DataSet.h:58
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:399
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:408
Interface for all concrete MVA method implementations.
Definition IMethod.h:53
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
IPythonInteractive()
standard constructor
~IPythonInteractive()
standard destructor
void ClearGraphs()
This function sets the point number to 0 for all graphs.
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Virtual base Class for all MVA method.
Definition MethodBase.h:111
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
void PrintHelpMessage() const override
prints out method-specific help method
virtual std::vector< Float_t > GetAllMulticlassValues()
Get all multi-class values.
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
const char * GetName() const override
Definition MethodBase.h:337
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
void SetWeightFileDir(TString fileDir)
set directory of weight file
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables,...
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual Double_t GetSignificance() const
compute significance of mean difference
virtual Double_t GetProba(const Event *ev)
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual std::vector< Double_t > GetDataMvaValues(DataSet *data=nullptr, Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the given Data type
void SetupMethod()
setup of methods
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
virtual ~MethodBase()
destructor
void WriteMonitoringHistosToFile() const override
write special monitoring histograms to file dummy implementation here --------------—
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, , curve for given number of signal and background events; returns cut for maximum ...
virtual Double_t GetTrainingEfficiency(const TString &)
void SetWeightFileName(TString)
set the weight file name (depreciated)
TString GetWeightFileName() const
retrieve weight file name
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void InitBase()
default initialization called by all constructors
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
void MakeClass(const TString &classFileName=TString("")) const override
create reader class for method (classification only at present)
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
void ReadStateFromFile()
Function to write options and weights to file.
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream.
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
void SetTestvarName(const TString &v="")
Definition MethodBase.h:344
void ReadVariablesFromXML(void *varnode)
read variable info from XML
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables,...
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity:
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters according to the argument This is just a dummy .
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
void AddVarsXMLTo(void *parent) const
write variable info to XML
Double_t GetMvaValue(Double_t *errLower=nullptr, Double_t *errUpper=nullptr) override=0
void AddTargetsXMLTo(void *parent) const
write target info to XML
void ReadTargetsFromXML(void *tarnode)
read target info from XML
void ProcessBaseOptions()
the option string is decoded, for available options see "DeclareOptions"
void ReadStateFromXML(void *parent)
virtual std::vector< Float_t > GetAllRegressionValues()
Get al regression values in one call.
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
void AddClassesXMLTo(void *parent) const
write class info to XML
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
void SetSource(const std::string &source)
Definition MsgLogger.h:68
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
@ kSpline3
Definition PDF.h:70
@ kSpline2
Definition PDF.h:70
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
Definition Results.h:57
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition RootFinder.h:48
Linear interpolation of TGraph.
Definition TSpline1.h:43
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition Tools.cxx:203
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition Tools.cxx:376
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition Tools.cxx:122
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition Tools.cxx:564
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:803
TXMLEngine & xmlengine()
Definition Tools.h:262
Bool_t CheckSplines(const TH1 *, const TSpline *)
check quality of splining by comparing splines and histograms in each bin
Definition Tools.cxx:454
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1125
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition Tools.cxx:358
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1099
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1137
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Definition Types.h:135
@ kBackground
Definition Types.h:136
@ kLikelihood
Definition Types.h:79
@ kHMatrix
Definition Types.h:81
@ kMulticlass
Definition Types.h:129
@ kNoAnalysisType
Definition Types.h:130
@ kClassification
Definition Types.h:127
@ kMaxAnalysisType
Definition Types.h:131
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
Linear interpolation class.
Gaussian Transformation of input variables.
Class for type info of MVA input variable.
Linear interpolation class.
Linear interpolation class.
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition TMultiGraph.h:34
Collectable string class.
Definition TObjString.h:28
Basic string class.
Definition TString.h:138
void ToLower()
Change string to lower-case.
Definition TString.cxx:1189
Int_t Atoi() const
Return integer value of string.
Definition TString.cxx:1994
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition TString.cxx:1170
const char * Data() const
Definition TString.h:384
@ kLeading
Definition TString.h:284
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:660
virtual const char * GetBuildNode() const
Return the build node name.
Definition TSystem.cxx:3964
virtual int mkdir(const char *name, Bool_t recursive=kFALSE)
Make a file system directory.
Definition TSystem.cxx:916
virtual const char * WorkingDirectory()
Return working directory.
Definition TSystem.cxx:881
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition TSystem.cxx:1612
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
TLine * line
Double_t x[n]
Definition legend1.C:17
TH1F * h1
Definition legend1.C:5
Config & gConfig()
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:249
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:673
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:197
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:122
static void output()