Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodBase.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodBase *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21 * *
22 * Copyright (c) 2005-2011: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * U. of Bonn, Germany *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (see tmva/doc/LICENSE) *
31 * *
32 **********************************************************************************/
33
34/*! \class TMVA::MethodBase
35\ingroup TMVA
36
37 Virtual base Class for all MVA method
38
39 MethodBase hosts several specific evaluation methods.
40
41 The kind of MVA that provides optimal performance in an analysis strongly
42 depends on the particular application. The evaluation factory provides a
43 number of numerical benchmark results to directly assess the performance
44 of the MVA training on the independent test sample. These are:
45
46 - The _signal efficiency_ at three representative background efficiencies
47 (which is 1 &minus; rejection).
48 - The _significance_ of an MVA estimator, defined by the difference
49 between the MVA mean values for signal and background, divided by the
50 quadratic sum of their root mean squares.
51 - The _separation_ of an MVA _x_, defined by the integral
52 \f[
53 \frac{1}{2} \int \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx
54 \f]
55 where
56 \f$ S(x) \f$ and \f$ B(x) \f$ are the signal and background distributions,
57 respectively. The separation is zero for identical signal and background MVA
58 shapes, and it is one for disjunctive shapes.
59 - The average, \f$ \int x \mu (S(x)) dx \f$, of the signal \f$ \mu_{transform} \f$.
60 The \f$ \mu_{transform} \f$ of an MVA denotes the transformation that yields
61 a uniform background distribution. In this way, the signal distributions
62 \f$ S(x) \f$ can be directly compared among the various MVAs. The stronger
63 \f$ S(x) \f$ peaks towards one, the better is the discrimination of the MVA.
64 The \f$ \mu_{transform} \f$ is
65 [documented here](http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html).
66
67 The MVA standard output also prints the linear correlation coefficients between
68 signal and background, which can be useful to eliminate variables that exhibit too
69 strong correlations.
70*/
71
72#include "TMVA/MethodBase.h"
73
74#include "TMVA/Config.h"
75#include "TMVA/Configurable.h"
76#include "TMVA/DataSetInfo.h"
77#include "TMVA/DataSet.h"
78#include "TMVA/Factory.h"
79#include "TMVA/IMethod.h"
80#include "TMVA/MsgLogger.h"
81#include "TMVA/PDF.h"
82#include "TMVA/Ranking.h"
83#include "TMVA/DataLoader.h"
84#include "TMVA/Tools.h"
85#include "TMVA/Results.h"
89#include "TMVA/RootFinder.h"
90#include "TMVA/Timer.h"
91#include "TMVA/TSpline1.h"
92#include "TMVA/Types.h"
96#include "TMVA/VariableInfo.h"
100#include "TMVA/Version.h"
101
102#include "TROOT.h"
103#include "TSystem.h"
104#include "TObjString.h"
105#include "TQObject.h"
106#include "TSpline.h"
107#include "TMatrix.h"
108#include "TMath.h"
109#include "TH1F.h"
110#include "TH2F.h"
111#include "TFile.h"
112#include "TGraph.h"
113#include "TXMLEngine.h"
114
115#include <iomanip>
116#include <iostream>
117#include <fstream>
118#include <sstream>
119#include <cstdlib>
120#include <algorithm>
121#include <limits>
122
123
124
125using std::endl;
126using std::atof;
127
128//const Int_t MethodBase_MaxIterations_ = 200;
130
131//const Int_t NBIN_HIST_PLOT = 100;
132const Int_t NBIN_HIST_HIGH = 10000;
133
134#ifdef _WIN32
135/* Disable warning C4355: 'this' : used in base member initializer list */
136#pragma warning ( disable : 4355 )
137#endif
138
139
140#include "TMultiGraph.h"
141
142////////////////////////////////////////////////////////////////////////////////
143/// standard constructor
144
146{
147 fNumGraphs = 0;
148 fIndex = 0;
149}
150
151////////////////////////////////////////////////////////////////////////////////
152/// standard destructor
154{
155 if (fMultiGraph){
156 delete fMultiGraph;
157 fMultiGraph = nullptr;
158 }
159 return;
160}
161
162////////////////////////////////////////////////////////////////////////////////
163/// This function gets some title and it creates a TGraph for every title.
164/// It also sets up the style for every TGraph. All graphs are added to a single TMultiGraph.
165///
166/// \param[in] graphTitles vector of titles
167
169{
170 if (fNumGraphs!=0){
171 std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
172 return;
173 }
174 Int_t color = 2;
175 for(auto& title : graphTitles){
176 fGraphs.push_back( new TGraph() );
177 fGraphs.back()->SetTitle(title);
178 fGraphs.back()->SetName(title);
179 fGraphs.back()->SetFillColor(color);
180 fGraphs.back()->SetLineColor(color);
181 fGraphs.back()->SetMarkerColor(color);
182 fMultiGraph->Add(fGraphs.back());
183 color += 2;
184 fNumGraphs += 1;
185 }
186 return;
187}
188
189////////////////////////////////////////////////////////////////////////////////
190/// This function sets the point number to 0 for all graphs.
191
193{
194 for(Int_t i=0; i<fNumGraphs; i++){
195 fGraphs[i]->Set(0);
196 }
197}
198
199////////////////////////////////////////////////////////////////////////////////
200/// This function is used only in 2 TGraph case, and it will add new data points to graphs.
201///
202/// \param[in] x the x coordinate
203/// \param[in] y1 the y coordinate for the first TGraph
204/// \param[in] y2 the y coordinate for the second TGraph
205
207{
208 fGraphs[0]->Set(fIndex+1);
209 fGraphs[1]->Set(fIndex+1);
210 fGraphs[0]->SetPoint(fIndex, x, y1);
211 fGraphs[1]->SetPoint(fIndex, x, y2);
212 fIndex++;
213 return;
214}
215
216////////////////////////////////////////////////////////////////////////////////
217/// This function can add data points to as many TGraphs as we have.
218///
219/// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
220/// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
221
222void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
223{
224 for(Int_t i=0; i<fNumGraphs;i++){
225 fGraphs[i]->Set(fIndex+1);
226 fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
227 }
228 fIndex++;
229 return;
230}
231
232
233////////////////////////////////////////////////////////////////////////////////
234/// standard constructor
235
238 const TString& methodTitle,
240 const TString& theOption) :
241 IMethod(),
243 fTmpEvent ( 0 ),
244 fRanking ( 0 ),
245 fInputVars ( 0 ),
246 fAnalysisType ( Types::kNoAnalysisType ),
247 fRegressionReturnVal ( 0 ),
248 fMulticlassReturnVal ( 0 ),
249 fDataSetInfo ( dsi ),
250 fSignalReferenceCut ( 0.5 ),
251 fSignalReferenceCutOrientation( 1. ),
252 fVariableTransformType ( Types::kSignal ),
253 fJobName ( jobName ),
254 fMethodName ( methodTitle ),
255 fMethodType ( methodType ),
256 fTestvar ( "" ),
257 fTMVATrainingVersion ( TMVA_VERSION_CODE ),
258 fROOTTrainingVersion ( ROOT_VERSION_CODE ),
259 fConstructedFromWeightFile ( kFALSE ),
260 fBaseDir ( 0 ),
261 fMethodBaseDir ( 0 ),
262 fFile ( 0 ),
263 fSilentFile (kFALSE),
264 fModelPersistence (kTRUE),
265 fWeightFile ( "" ),
266 fEffS ( 0 ),
267 fDefaultPDF ( 0 ),
268 fMVAPdfS ( 0 ),
269 fMVAPdfB ( 0 ),
270 fSplS ( 0 ),
271 fSplB ( 0 ),
272 fSpleffBvsS ( 0 ),
273 fSplTrainS ( 0 ),
274 fSplTrainB ( 0 ),
275 fSplTrainEffBvsS ( 0 ),
276 fVarTransformString ( "None" ),
277 fTransformationPointer ( 0 ),
278 fTransformation ( dsi, methodTitle ),
279 fVerbose ( kFALSE ),
280 fVerbosityLevelString ( "Default" ),
281 fHelp ( kFALSE ),
282 fHasMVAPdfs ( kFALSE ),
283 fIgnoreNegWeightsInTraining( kFALSE ),
284 fSignalClass ( 0 ),
285 fBackgroundClass ( 0 ),
286 fSplRefS ( 0 ),
287 fSplRefB ( 0 ),
288 fSplTrainRefS ( 0 ),
289 fSplTrainRefB ( 0 ),
290 fSetupCompleted (kFALSE)
291{
294
295// // default extension for weight files
296}
297
298////////////////////////////////////////////////////////////////////////////////
299/// constructor used for Testing + Application of the MVA,
300/// only (no training), using given WeightFiles
301
304 const TString& weightFile ) :
305 IMethod(),
306 Configurable(""),
307 fTmpEvent ( 0 ),
308 fRanking ( 0 ),
309 fInputVars ( 0 ),
310 fAnalysisType ( Types::kNoAnalysisType ),
311 fRegressionReturnVal ( 0 ),
312 fMulticlassReturnVal ( 0 ),
313 fDataSetInfo ( dsi ),
314 fSignalReferenceCut ( 0.5 ),
315 fVariableTransformType ( Types::kSignal ),
316 fJobName ( "" ),
317 fMethodName ( "MethodBase" ),
318 fMethodType ( methodType ),
319 fTestvar ( "" ),
320 fTMVATrainingVersion ( 0 ),
321 fROOTTrainingVersion ( 0 ),
322 fConstructedFromWeightFile ( kTRUE ),
323 fBaseDir ( 0 ),
324 fMethodBaseDir ( 0 ),
325 fFile ( 0 ),
326 fSilentFile (kFALSE),
327 fModelPersistence (kTRUE),
328 fWeightFile ( weightFile ),
329 fEffS ( 0 ),
330 fDefaultPDF ( 0 ),
331 fMVAPdfS ( 0 ),
332 fMVAPdfB ( 0 ),
333 fSplS ( 0 ),
334 fSplB ( 0 ),
335 fSpleffBvsS ( 0 ),
336 fSplTrainS ( 0 ),
337 fSplTrainB ( 0 ),
338 fSplTrainEffBvsS ( 0 ),
339 fVarTransformString ( "None" ),
340 fTransformationPointer ( 0 ),
341 fTransformation ( dsi, "" ),
342 fVerbose ( kFALSE ),
343 fVerbosityLevelString ( "Default" ),
344 fHelp ( kFALSE ),
345 fHasMVAPdfs ( kFALSE ),
346 fIgnoreNegWeightsInTraining( kFALSE ),
347 fSignalClass ( 0 ),
348 fBackgroundClass ( 0 ),
349 fSplRefS ( 0 ),
350 fSplRefB ( 0 ),
351 fSplTrainRefS ( 0 ),
352 fSplTrainRefB ( 0 ),
353 fSetupCompleted (kFALSE)
354{
356// // constructor used for Testing + Application of the MVA,
357// // only (no training), using given WeightFiles
358}
359
360////////////////////////////////////////////////////////////////////////////////
361/// destructor
362
364{
365 // destructor
366 if (!fSetupCompleted) Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
367
368 // destructor
369 if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
370 if (fRanking != 0) delete fRanking;
371
372 // PDFs
373 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
374 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
375 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
376
377 // Splines
378 if (fSplS) { delete fSplS; fSplS = 0; }
379 if (fSplB) { delete fSplB; fSplB = 0; }
380 if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
381 if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
382 if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
383 if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
384 if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
385 if (fSplTrainEffBvsS) { delete fSplTrainEffBvsS; fSplTrainEffBvsS = 0; }
386
387 for (size_t i = 0; i < fEventCollections.size(); i++ ) {
388 if (fEventCollections.at(i)) {
389 for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
390 it != fEventCollections.at(i)->end(); ++it) {
391 delete (*it);
392 }
393 delete fEventCollections.at(i);
394 fEventCollections.at(i) = nullptr;
395 }
396 }
397
398 if (fRegressionReturnVal) delete fRegressionReturnVal;
399 if (fMulticlassReturnVal) delete fMulticlassReturnVal;
400}
401
402////////////////////////////////////////////////////////////////////////////////
403/// setup of methods
404
406{
407 // setup of methods
408
409 if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
410 InitBase();
411 DeclareBaseOptions();
412 Init();
413 DeclareOptions();
414 fSetupCompleted = kTRUE;
415}
416
417////////////////////////////////////////////////////////////////////////////////
418/// process all options
419/// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
420/// (sometimes, eg, fitters are used which can only be implemented during training phase)
421
423{
424 ProcessBaseOptions();
425 ProcessOptions();
426}
427
428////////////////////////////////////////////////////////////////////////////////
429/// check may be overridden by derived class
430/// (sometimes, eg, fitters are used which can only be implemented during training phase)
431
433{
434 CheckForUnusedOptions();
435}
436
437////////////////////////////////////////////////////////////////////////////////
438/// default initialization called by all constructors
439
441{
442 SetConfigDescription( "Configuration options for classifier architecture and tuning" );
443
444 fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
445 fNbinsMVAoutput = gConfig().fVariablePlotting.fNbinsMVAoutput;
446 fNbinsH = NBIN_HIST_HIGH;
447
448 fSplTrainS = 0;
449 fSplTrainB = 0;
450 fSplTrainEffBvsS = 0;
451 fMeanS = -1;
452 fMeanB = -1;
453 fRmsS = -1;
454 fRmsB = -1;
455 fXmin = DBL_MAX;
456 fXmax = -DBL_MAX;
457 fTxtWeightsOnly = kTRUE;
458 fSplRefS = 0;
459 fSplRefB = 0;
460
461 fTrainTime = -1.;
462 fTestTime = -1.;
463
464 fRanking = 0;
465
466 // temporary until the move to DataSet is complete
467 fInputVars = new std::vector<TString>;
468 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
469 fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
470 }
471 fRegressionReturnVal = 0;
472 fMulticlassReturnVal = 0;
473
474 fEventCollections.resize( 2 );
475 fEventCollections.at(0) = 0;
476 fEventCollections.at(1) = 0;
477
478 // retrieve signal and background class index
479 if (DataInfo().GetClassInfo("Signal") != 0) {
480 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
481 }
482 if (DataInfo().GetClassInfo("Background") != 0) {
483 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
484 }
485
486 SetConfigDescription( "Configuration options for MVA method" );
487 SetConfigName( TString("Method") + GetMethodTypeName() );
488}
489
490////////////////////////////////////////////////////////////////////////////////
491/// define the options (their key words) that can be set in the option string
492/// here the options valid for ALL MVA methods are declared.
493///
494/// know options:
495///
496/// - VariableTransform=None,Decorrelated,PCA to use transformed variables
497/// instead of the original ones
498/// - VariableTransformType=Signal,Background which decorrelation matrix to use
499/// in the method. Only the Likelihood
500/// Method can make proper use of independent
501/// transformations of signal and background
502/// - fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
503/// - fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
504/// - fHasMVAPdfs create PDFs for the MVA outputs
505/// - V for Verbose output (!V) for non verbos
506/// - H for Help message
507
509{
510 DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
511
512 DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
513 AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
514 AddPreDefVal( TString("Debug") );
515 AddPreDefVal( TString("Verbose") );
516 AddPreDefVal( TString("Info") );
517 AddPreDefVal( TString("Warning") );
518 AddPreDefVal( TString("Error") );
519 AddPreDefVal( TString("Fatal") );
520
521 // If True (default): write all training results (weights) as text files only;
522 // if False: write also in ROOT format (not available for all methods - will abort if not
523 fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
524 fNormalise = kFALSE; // OBSOLETE !!!
525
526 DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
527
528 DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
529
530 DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
531
532 DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
533 "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
534}
535
536////////////////////////////////////////////////////////////////////////////////
537/// the option string is decoded, for available options see "DeclareOptions"
538
540{
541 if (HasMVAPdfs()) {
542 // setting the default bin num... maybe should be static ? ==> Please no static (JS)
543 // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
544 // reading every PDF's definition and passing the option string to the next one to be read and marked
545 fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
546 fDefaultPDF->DeclareOptions();
547 fDefaultPDF->ParseOptions();
548 fDefaultPDF->ProcessOptions();
549 fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
550 fMVAPdfB->DeclareOptions();
551 fMVAPdfB->ParseOptions();
552 fMVAPdfB->ProcessOptions();
553 fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
554 fMVAPdfS->DeclareOptions();
555 fMVAPdfS->ParseOptions();
556 fMVAPdfS->ProcessOptions();
557
558 // the final marked option string is written back to the original methodbase
559 SetOptions( fMVAPdfS->GetOptions() );
560 }
561
562 TMVA::CreateVariableTransforms( fVarTransformString,
563 DataInfo(),
564 GetTransformationHandler(),
565 Log() );
566
567 if (!HasMVAPdfs()) {
568 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
569 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
570 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
571 }
572
573 if (fVerbose) { // overwrites other settings
574 fVerbosityLevelString = TString("Verbose");
575 Log().SetMinType( kVERBOSE );
576 }
577 else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
578 else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
579 else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
580 else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
581 else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
582 else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
583 else if (fVerbosityLevelString != "Default" ) {
584 Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
585 << fVerbosityLevelString << "' unknown." << Endl;
586 }
587 Event::SetIgnoreNegWeightsInTraining(fIgnoreNegWeightsInTraining);
588}
589
590////////////////////////////////////////////////////////////////////////////////
591/// options that are used ONLY for the READER to ensure backward compatibility
592/// they are hence without any effect (the reader is only reading the training
593/// options that HAD been used at the training of the .xml weight file at hand
594
596{
597 DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
598 DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
599 DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
600 "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
601 AddPreDefVal( TString("Signal") );
602 AddPreDefVal( TString("Background") );
603 DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
604 // Why on earth ?? was this here? Was the verbosity level option meant to 'disappear? Not a good idea i think..
605 // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
606 // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
607 // AddPreDefVal( TString("Debug") );
608 // AddPreDefVal( TString("Verbose") );
609 // AddPreDefVal( TString("Info") );
610 // AddPreDefVal( TString("Warning") );
611 // AddPreDefVal( TString("Error") );
612 // AddPreDefVal( TString("Fatal") );
613 DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
614 DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
615}
616
617
618////////////////////////////////////////////////////////////////////////////////
619/// call the Optimizer with the set of parameters and ranges that
620/// are meant to be tuned.
621
622std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
623{
624 // this is just a dummy... needs to be implemented for each method
625 // individually (as long as we don't have it automatized via the
626 // configuration string
627
628 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
629 << GetName() << Endl;
630 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
631
632 return std::map<TString,Double_t>();
633}
634
635////////////////////////////////////////////////////////////////////////////////
636/// set the tuning parameters according to the argument
637/// This is just a dummy .. have a look at the MethodBDT how you could
638/// perhaps implement the same thing for the other Classifiers..
639
640void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
641{
642}
643
644////////////////////////////////////////////////////////////////////////////////
645
647{
648 Data()->SetCurrentType(Types::kTraining);
649 Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
650
651 // train the MVA method
652 if (Help()) PrintHelpMessage();
653
654 // all histograms should be created in the method's subdirectory
655 if(!IsSilentFile()) BaseDir()->cd();
656
657 // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
658 // needed for this classifier
659 GetTransformationHandler().CalcTransformations(Data()->GetEventCollection());
660
661 // call training of derived MVA
662 Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
663 << "Begin training" << Endl;
664 Long64_t nEvents = Data()->GetNEvents();
665 Timer traintimer( nEvents, GetName(), kTRUE );
666 Train();
667 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
668 << "\tEnd of training " << Endl;
669 SetTrainTime(traintimer.ElapsedSeconds());
670 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
671 << "Elapsed time for training with " << nEvents << " events: "
672 << traintimer.GetElapsedTime() << " " << Endl;
673
674 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
675 << "\tCreate MVA output for ";
676
677 // create PDFs for the signal and background MVA distributions (if required)
678 if (DoMulticlass()) {
679 Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
680 AddMulticlassOutput(Types::kTraining);
681 }
682 else if (!DoRegression()) {
683
684 Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
685 AddClassifierOutput(Types::kTraining);
686 if (HasMVAPdfs()) {
687 CreateMVAPdfs();
688 AddClassifierOutputProb(Types::kTraining);
689 }
690
691 } else {
692
693 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
694 AddRegressionOutput( Types::kTraining );
695
696 if (HasMVAPdfs() ) {
697 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
698 CreateMVAPdfs();
699 }
700 }
701
702 // write the current MVA state into stream
703 // produced are one text file and one ROOT file
704 if (fModelPersistence ) WriteStateToFile();
705
706 // produce standalone make class (presently only supported for classification)
707 if ((!DoRegression()) && (fModelPersistence)) MakeClass();
708
709 // write additional monitoring histograms to main target file (not the weight file)
710 // again, make sure the histograms go into the method's subdirectory
711 if(!IsSilentFile())
712 {
713 BaseDir()->cd();
714 WriteMonitoringHistosToFile();
715 }
716}
717
718////////////////////////////////////////////////////////////////////////////////
719
721{
722 if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
723 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
724 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), Types::kTesting, Types::kRegression);
725 bool truncate = false;
726 TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
727 stddev = sqrt(h1->GetMean());
728 truncate = true;
729 Double_t yq[1], xq[]={0.9};
730 h1->GetQuantiles(1,yq,xq);
731 TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
732 stddev90Percent = sqrt(h2->GetMean());
733 delete h1;
734 delete h2;
735}
736
737////////////////////////////////////////////////////////////////////////////////
738/// Get al regression values in one call
740{
741 Long64_t nEvents = Data()->GetNEvents();
742 // use timer
743 Timer timer( nEvents, GetName(), kTRUE );
744
745 // Drawing the progress bar every event was causing a huge slowdown in the evaluation time
746 // So we set some parameters to draw the progress bar a total of totalProgressDraws, i.e. only draw every 1 in 100
747
748 Int_t totalProgressDraws = 100; // total number of times to update the progress bar
749 Int_t drawProgressEvery = 1; // draw every nth event such that we have a total of totalProgressDraws
751
752 size_t ntargets = Data()->GetEvent(0)->GetNTargets();
753 std::vector<float> output(nEvents*ntargets);
754 auto itr = output.begin();
755 for (Int_t ievt=0; ievt<nEvents; ievt++) {
756
757 Data()->SetCurrentEvent(ievt);
758 std::vector< Float_t > vals = GetRegressionValues();
759 if (vals.size() != ntargets)
760 Log() << kFATAL << "Output regression vector with size " << vals.size() << " is not consistent with target size of "
761 << ntargets << std::endl;
762
763 std::copy(vals.begin(), vals.end(), itr);
764 itr += vals.size();
765
766 // Only draw the progress bar once in a while, doing this every event causes the evaluation to be ridiculously slow
767 if(ievt % drawProgressEvery == 0 || ievt==nEvents-1) timer.DrawProgressBar( ievt );
768 }
769
770 return output;
771}
772
773////////////////////////////////////////////////////////////////////////////////
774/// prepare tree branch with the method's discriminating variable
775
777{
778 Data()->SetCurrentType(type);
779
780 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
781
782 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), type, Types::kRegression);
783
784 Long64_t nEvents = Data()->GetNEvents();
785
786 // use timer
787 Timer timer( nEvents, GetName(), kTRUE );
788
789 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
790 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
791
792 regRes->Resize( nEvents );
793
794 std::vector<float> output = GetAllRegressionValues();
795 // assume we have all number of targets for all events
796 Data()->SetCurrentEvent(0);
797 size_t nTargets = GetEvent()->GetNTargets();
798 auto regValuesBegin = output.begin();
800
801 if (output.size() != nTargets * size_t(nEvents))
802 Log() << kFATAL << "Output regression vector with size " << output.size() << " is not consistent with target size of "
803 << nTargets << " and number of events " << nEvents << std::endl;
804
805
806 for (Int_t ievt=0; ievt<nEvents; ievt++) {
807
808 std::vector< Float_t > vals(regValuesBegin, regValuesEnd);
809 regRes->SetValue( vals, ievt );
810
813 }
814
815 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
816 << "Elapsed time for evaluation of " << nEvents << " events: "
817 << timer.GetElapsedTime() << " " << Endl;
818
819 // store time used for testing
821 SetTestTime(timer.ElapsedSeconds());
822
823 TString histNamePrefix(GetTestvarName());
824 histNamePrefix += (type==Types::kTraining?"train":"test");
825 regRes->CreateDeviationHistograms( histNamePrefix );
826}
827////////////////////////////////////////////////////////////////////////////////
828/// Get all multi-class values
830{
831 // use timer for progress bar
832
833 Long64_t nEvents = Data()->GetNEvents();
834 Timer timer( nEvents, GetName(), kTRUE );
835
836 Int_t modulo = Int_t(nEvents/100) + 1;
837 // call first time to get number of classes
838 Data()->SetCurrentEvent(0);
839 std::vector< Float_t > vals = GetMulticlassValues();
840 std::vector<float> output(nEvents * vals.size());
841 auto itr = output.begin();
842 std::copy(vals.begin(), vals.end(), itr);
843 for (Int_t ievt=1; ievt<nEvents; ievt++) {
844 itr += vals.size();
845 Data()->SetCurrentEvent(ievt);
846 vals = GetMulticlassValues();
847
848 std::copy(vals.begin(), vals.end(), itr);
849
850 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
851 }
852 return output;
853}
854////////////////////////////////////////////////////////////////////////////////
855/// prepare tree branch with the method's discriminating variable
856
858{
859 Data()->SetCurrentType(type);
860
861 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
862
863 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
864 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
865
866 Long64_t nEvents = Data()->GetNEvents();
867
868 // use timer
869 Timer timer( nEvents, GetName(), kTRUE );
870
871 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
872 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
873
874 resMulticlass->Resize( nEvents );
875 std::vector<Float_t> output = GetAllMulticlassValues();
876 size_t nClasses = output.size()/nEvents;
877 for (Int_t ievt=0; ievt<nEvents; ievt++) {
878 std::vector< Float_t > vals(output.begin()+ievt*nClasses, output.begin()+(ievt+1)*nClasses);
879 resMulticlass->SetValue( vals, ievt );
880 }
881
882
883 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
884 << "Elapsed time for evaluation of " << nEvents << " events: "
885 << timer.GetElapsedTime() << " " << Endl;
886
887 // store time used for testing
889 SetTestTime(timer.ElapsedSeconds());
890
891 TString histNamePrefix(GetTestvarName());
892 histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
893
894 resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
895 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
896}
897
898////////////////////////////////////////////////////////////////////////////////
899
901 if (err) *err=-1;
902 if (errUpper) *errUpper=-1;
903}
904
905////////////////////////////////////////////////////////////////////////////////
906
908 fTmpEvent = ev;
909 Double_t val = GetMvaValue(err, errUpper);
910 fTmpEvent = 0;
911 return val;
912}
913
914////////////////////////////////////////////////////////////////////////////////
915/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
916/// for a quick determination if an event would be selected as signal or background
917
919 return GetMvaValue()*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
920}
921////////////////////////////////////////////////////////////////////////////////
922/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
923/// for a quick determination if an event with this mva output value would be selected as signal or background
924
926 return mvaVal*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
927}
928
929////////////////////////////////////////////////////////////////////////////////
930/// prepare tree branch with the method's discriminating variable
931
933{
934 Data()->SetCurrentType(type);
935
937 (ResultsClassification*)Data()->GetResults(GetMethodName(), type, Types::kClassification );
938
939 Long64_t nEvents = Data()->GetNEvents();
940 clRes->Resize( nEvents );
941
942 // use timer
943 Timer timer( nEvents, GetName(), kTRUE );
944
945 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
946 << "Evaluation of " << GetMethodName() << " on "
947 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
948 << " sample (" << nEvents << " events)" << Endl;
949
950 std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
951
952 Log() << kINFO
953 << "Elapsed time for evaluation of " << nEvents << " events: "
954 << timer.GetElapsedTime() << " " << Endl;
955
956 // store time used for testing
958 SetTestTime(timer.ElapsedSeconds());
959
960 // load mva values and type to results object
961 for (Int_t ievt = 0; ievt < nEvents; ievt++) {
962 // note we do not need the trasformed event to get the signal/background information
963 // by calling Data()->GetEvent instead of this->GetEvent we access the untransformed one
964 auto ev = Data()->GetEvent(ievt);
965 clRes->SetValue(mvaValues[ievt], ievt, DataInfo().IsSignal(ev));
966 }
967}
968
969////////////////////////////////////////////////////////////////////////////////
970/// get all the MVA values for the events of the current Data type
972{
973
974 Long64_t nEvents = Data()->GetNEvents();
975 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
976 if (firstEvt < 0) firstEvt = 0;
977 std::vector<Double_t> values(lastEvt-firstEvt);
978 // log in case of looping on all the events
979 nEvents = values.size();
980
981 // use timer
982 Timer timer( nEvents, GetName(), kTRUE );
983
984 if (logProgress)
985 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
986 << "Evaluation of " << GetMethodName() << " on "
987 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
988 << " sample (" << nEvents << " events)" << Endl;
989
990 for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
991 Data()->SetCurrentEvent(ievt);
992 values[ievt] = GetMvaValue();
993
994 // print progress
995 if (logProgress) {
996 Int_t modulo = Int_t(nEvents/100);
997 if (modulo <= 0 ) modulo = 1;
998 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
999 }
1000 }
1001 if (logProgress) {
1002 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1003 << "Elapsed time for evaluation of " << nEvents << " events: "
1004 << timer.GetElapsedTime() << " " << Endl;
1005 }
1006
1007 return values;
1008}
1009
1010////////////////////////////////////////////////////////////////////////////////
1011/// get all the MVA values for the events of the given Data type
1012// (this is used by Method Category and it does not need to be re-implemented by derived classes )
1014{
1015 fTmpData = data;
1016 auto result = GetMvaValues(firstEvt, lastEvt, logProgress);
1017 fTmpData = nullptr;
1018 return result;
1019}
1020
1021////////////////////////////////////////////////////////////////////////////////
1022/// prepare tree branch with the method's discriminating variable
1023
1025{
1026 Data()->SetCurrentType(type);
1027
1029 (ResultsClassification*)Data()->GetResults(TString("prob_")+GetMethodName(), type, Types::kClassification );
1030
1031 Long64_t nEvents = Data()->GetNEvents();
1032
1033 // use timer
1034 Timer timer( nEvents, GetName(), kTRUE );
1035
1036 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
1037 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
1038
1039 mvaProb->Resize( nEvents );
1040 Int_t modulo = Int_t(nEvents/100);
1041 if (modulo <= 0 ) modulo = 1;
1042 for (Int_t ievt=0; ievt<nEvents; ievt++) {
1043
1044 Data()->SetCurrentEvent(ievt);
1045 Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
1046 if (proba < 0) break;
1047 mvaProb->SetValue( proba, ievt, DataInfo().IsSignal( Data()->GetEvent()) );
1048
1049 // print progress
1050 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
1051 }
1052
1053 Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
1054 << "Elapsed time for evaluation of " << nEvents << " events: "
1055 << timer.GetElapsedTime() << " " << Endl;
1056}
1057
1058////////////////////////////////////////////////////////////////////////////////
1059/// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
1060///
1061/// - bias = average deviation
1062/// - dev = average absolute deviation
1063/// - rms = rms of deviation
1064
1069 Double_t& corr,
1071{
1072 Types::ETreeType savedType = Data()->GetCurrentType();
1073 Data()->SetCurrentType(type);
1074
1075 bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
1076 Double_t sumw = 0;
1077 Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
1078 const Int_t nevt = GetNEvents();
1079 Float_t* rV = new Float_t[nevt];
1080 Float_t* tV = new Float_t[nevt];
1081 Float_t* wV = new Float_t[nevt];
1082 Float_t xmin = 1e30, xmax = -1e30;
1083 Log() << kINFO << "Calculate regression for all events" << Endl;
1084 Timer timer( nevt, GetName(), kTRUE );
1085 Long64_t modulo = Long64_t(nevt / 100) + 1;
1086 auto output = GetAllRegressionValues();
1087 int ntargets = Data()->GetEvent(0)->GetNTargets();
1088 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1089 const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
1090 Float_t t = ev->GetTarget(0);
1091 Float_t w = ev->GetWeight();
1093 Float_t d = (r-t);
1094
1095 // find min/max
1098
1099 // store for truncated RMS computation
1100 rV[ievt] = r;
1101 tV[ievt] = t;
1102 wV[ievt] = w;
1103
1104 // compute deviation-squared
1105 sumw += w;
1106 bias += w * d;
1107 dev += w * TMath::Abs(d);
1108 rms += w * d * d;
1109
1110 // compute correlation between target and regression estimate
1111 m1 += t*w; s1 += t*t*w;
1112 m2 += r*w; s2 += r*r*w;
1113 s12 += t*r;
1114 // print progress
1115 if (ievt % modulo == 0)
1116 timer.DrawProgressBar(ievt);
1117 }
1118 timer.DrawProgressBar(nevt - 1);
1119 Log() << kINFO << "Elapsed time for evaluation of " << nevt << " events: "
1120 << timer.GetElapsedTime() << " " << Endl;
1121
1122 // standard quantities
1123 bias /= sumw;
1124 dev /= sumw;
1125 rms /= sumw;
1127
1128 // correlation
1129 m1 /= sumw;
1130 m2 /= sumw;
1131 corr = s12/sumw - m1*m2;
1132 corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1133
1134 // create histogram required for computation of mutual information
1135 TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1136 TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1137
1138 // compute truncated RMS and fill histogram
1139 Double_t devMax = bias + 2*rms;
1140 Double_t devMin = bias - 2*rms;
1141 sumw = 0;
1142 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1143 Float_t d = (rV[ievt] - tV[ievt]);
1144 hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1145 if (d >= devMin && d <= devMax) {
1146 sumw += wV[ievt];
1147 biasT += wV[ievt] * d;
1148 devT += wV[ievt] * TMath::Abs(d);
1149 rmsT += wV[ievt] * d * d;
1150 histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1151 }
1152 }
1153 biasT /= sumw;
1154 devT /= sumw;
1155 rmsT /= sumw;
1157 mInf = gTools().GetMutualInformation( *hist );
1159
1160 delete hist;
1161 delete histT;
1162
1163 delete [] rV;
1164 delete [] tV;
1165 delete [] wV;
1166
1167 Data()->SetCurrentType(savedType);
1168}
1169
1170
1171////////////////////////////////////////////////////////////////////////////////
1172/// test multiclass classification
1173
1175{
1176 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
1177 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1178
1179 // GA evaluation of best cut for sig eff * sig pur. Slow, disabled for now.
1180 // Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test
1181 // data..." << Endl; for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1182 // resMulticlass->GetBestMultiClassCuts(icls);
1183 // }
1184
1185 // Create histograms for use in TMVA GUI
1186 TString histNamePrefix(GetTestvarName());
1189
1190 resMulticlass->CreateMulticlassHistos(histNamePrefixTest, fNbinsMVAoutput, fNbinsH);
1191 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTest);
1192
1193 resMulticlass->CreateMulticlassHistos(histNamePrefixTrain, fNbinsMVAoutput, fNbinsH);
1194 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTrain);
1195}
1196
1197
1198////////////////////////////////////////////////////////////////////////////////
1199/// initialization
1200
1202{
1203 Data()->SetCurrentType(Types::kTesting);
1204
1206 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
1207
1208 // sanity checks: tree must exist, and theVar must be in tree
1209 if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1210 Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1211 << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1212 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1213 << " not found in tree" << Endl;
1214 }
1215
1216 // basic statistics operations are made in base class
1217 gTools().ComputeStat( GetEventCollection(Types::kTesting), mvaRes->GetValueVector(),
1218 fMeanS, fMeanB, fRmsS, fRmsB, fXmin, fXmax, fSignalClass );
1219
1220 // choose reasonable histogram ranges, by removing outliers
1221 Double_t nrms = 10;
1222 fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1223 fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1224
1225 // determine cut orientation
1226 fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
1227
1228 // fill 2 types of histograms for the various analyses
1229 // this one is for actual plotting
1230
1231 Double_t sxmax = fXmax+0.00001;
1232
1233 // classifier response distributions for training sample
1234 // MVA plots used for graphics representation (signal)
1236 if(IsSilentFile()) {
1237 TestvarName = TString::Format("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1238 } else {
1239 TestvarName=GetTestvarName();
1240 }
1241 TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1242 TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1243 mvaRes->Store(mva_s, "MVA_S");
1244 mvaRes->Store(mva_b, "MVA_B");
1245 mva_s->Sumw2();
1246 mva_b->Sumw2();
1247
1248 TH1* proba_s = 0;
1249 TH1* proba_b = 0;
1250 TH1* rarity_s = 0;
1251 TH1* rarity_b = 0;
1252 if (HasMVAPdfs()) {
1253 // P(MVA) plots used for graphics representation
1254 proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1255 proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1256 mvaRes->Store(proba_s, "Prob_S");
1257 mvaRes->Store(proba_b, "Prob_B");
1258 proba_s->Sumw2();
1259 proba_b->Sumw2();
1260
1261 // R(MVA) plots used for graphics representation
1262 rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1263 rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1264 mvaRes->Store(rarity_s, "Rar_S");
1265 mvaRes->Store(rarity_b, "Rar_B");
1266 rarity_s->Sumw2();
1267 rarity_b->Sumw2();
1268 }
1269
1270 // MVA plots used for efficiency calculations (large number of bins)
1271 TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1272 TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1273 mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1274 mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1275 mva_eff_s->Sumw2();
1276 mva_eff_b->Sumw2();
1277
1278 // fill the histograms
1279
1281 (Data()->GetResults( TString("prob_")+GetMethodName(), Types::kTesting, Types::kMaxAnalysisType ) );
1282
1283 Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1284 if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1285 //std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1286
1287 //LM: this is needed to avoid crashes in ROOCCURVE
1288 if ( mvaRes->GetSize() != GetNEvents() ) {
1289 Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1290 assert(mvaRes->GetSize() == GetNEvents());
1291 }
1292
1293 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1294
1295 const Event* ev = GetEvent(ievt);
1296 Float_t v = (*mvaRes)[ievt][0];
1297 Float_t w = ev->GetWeight();
1298
1299 if (DataInfo().IsSignal(ev)) {
1300 //mvaResTypes->push_back(kTRUE);
1301 mva_s ->Fill( v, w );
1302 if (mvaProb) {
1303 proba_s->Fill( (*mvaProb)[ievt][0], w );
1304 rarity_s->Fill( GetRarity( v ), w );
1305 }
1306
1307 mva_eff_s ->Fill( v, w );
1308 }
1309 else {
1310 //mvaResTypes->push_back(kFALSE);
1311 mva_b ->Fill( v, w );
1312 if (mvaProb) {
1313 proba_b->Fill( (*mvaProb)[ievt][0], w );
1314 rarity_b->Fill( GetRarity( v ), w );
1315 }
1316 mva_eff_b ->Fill( v, w );
1317 }
1318 }
1319
1320 // uncomment those (and several others if you want unnormalized output
1321 gTools().NormHist( mva_s );
1322 gTools().NormHist( mva_b );
1323 gTools().NormHist( proba_s );
1324 gTools().NormHist( proba_b );
1329
1330 // create PDFs from histograms, using default splines, and no additional smoothing
1331 if (fSplS) { delete fSplS; fSplS = 0; }
1332 if (fSplB) { delete fSplB; fSplB = 0; }
1333 fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1334 fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1335}
1336
1337////////////////////////////////////////////////////////////////////////////////
1338/// general method used in writing the header of the weight files where
1339/// the used variables, variable transformation type etc. is specified
1340
1341void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1342{
1343 TString prefix = "";
1345
1346 tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1347 tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1348 tf.setf(std::ios::left);
1349 tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1350 << GetTrainingTMVAVersionCode() << "]" << std::endl;
1351 tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1352 << GetTrainingROOTVersionCode() << "]" << std::endl;
1353 tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1354 tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1355 tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1356 tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1357 tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1358
1359 TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1360
1361 tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1362 tf << prefix << std::endl;
1363
1364 delete userInfo;
1365
1366 // First write all options
1367 tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1368 WriteOptionsToStream( tf, prefix );
1369 tf << prefix << std::endl;
1370
1371 // Second write variable info
1372 tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1373 WriteVarsToStream( tf, prefix );
1374 tf << prefix << std::endl;
1375}
1376
1377////////////////////////////////////////////////////////////////////////////////
1378/// xml writing
1379
1380void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1381{
1382 void* it = gTools().AddChild(gi,"Info");
1383 gTools().AddAttr(it,"name", name);
1384 gTools().AddAttr(it,"value", value);
1385}
1386
1387////////////////////////////////////////////////////////////////////////////////
1388
1390 if (analysisType == Types::kRegression) {
1391 AddRegressionOutput( type );
1392 } else if (analysisType == Types::kMulticlass) {
1393 AddMulticlassOutput( type );
1394 } else {
1395 AddClassifierOutput( type );
1396 if (HasMVAPdfs())
1397 AddClassifierOutputProb( type );
1398 }
1399}
1400
1401////////////////////////////////////////////////////////////////////////////////
1402/// general method used in writing the header of the weight files where
1403/// the used variables, variable transformation type etc. is specified
1404
1405void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1406{
1407 if (!parent) return;
1408
1410
1411 void* gi = gTools().AddChild(parent, "GeneralInfo");
1412 AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1413 AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1414 AddInfoItem( gi, "Creator", userInfo->fUser);
1415 TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1416 AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1417 AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1418 AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1419 AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1420
1421 Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1422 TString analysisType((aType==Types::kRegression) ? "Regression" :
1423 (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1424 AddInfoItem( gi, "AnalysisType", analysisType );
1425 delete userInfo;
1426
1427 // write options
1428 AddOptionsXMLTo( parent );
1429
1430 // write variable info
1431 AddVarsXMLTo( parent );
1432
1433 // write spectator info
1434 if (fModelPersistence)
1435 AddSpectatorsXMLTo( parent );
1436
1437 // write class info if in multiclass mode
1438 AddClassesXMLTo(parent);
1439
1440 // write target info if in regression mode
1441 if (DoRegression()) AddTargetsXMLTo(parent);
1442
1443 // write transformations
1444 GetTransformationHandler(false).AddXMLTo( parent );
1445
1446 // write MVA variable distributions
1447 void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1448 if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1449 if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1450
1451 // write weights
1452 AddWeightsXMLTo( parent );
1453}
1454
1455////////////////////////////////////////////////////////////////////////////////
1456/// write reference MVA distributions (and other information)
1457/// to a ROOT type weight file
1458
1460{
1462 TH1::AddDirectory( 0 ); // this avoids the binding of the hists in PDF to the current ROOT file
1463 fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1464 fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1465
1467
1468 ReadWeightsFromStream( rf );
1469
1470 SetTestvarName();
1471}
1472
1473////////////////////////////////////////////////////////////////////////////////
1474/// write options and weights to file
1475/// note that each one text file for the main configuration information
1476/// and one ROOT file for ROOT objects are created
1477
1479{
1480 // ---- create the text file
1481 TString tfname( GetWeightFileName() );
1482
1483 // writing xml file
1484 TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1485 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1486 << "Creating xml weight file: "
1487 << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1488 void* doc = gTools().xmlengine().NewDoc();
1489 void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1490 gTools().xmlengine().DocSetRootElement(doc,rootnode);
1491 gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1492 WriteStateToXML(rootnode);
1495}
1496
1497////////////////////////////////////////////////////////////////////////////////
1498/// Function to write options and weights to file
1499
1501{
1502 // get the filename
1503
1504 TString tfname(GetWeightFileName());
1505
1506 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1507 << "Reading weight file: "
1508 << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1509
1510 if (tfname.EndsWith(".xml") ) {
1511 void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1512 if (!doc) {
1513 Log() << kFATAL << "Error parsing XML file " << tfname << Endl;
1514 }
1515 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1516 ReadStateFromXML(rootnode);
1518 }
1519 else {
1520 std::filebuf fb;
1521 fb.open(tfname.Data(),std::ios::in);
1522 if (!fb.is_open()) { // file not found --> Error
1523 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1524 << "Unable to open input weight file: " << tfname << Endl;
1525 }
1526 std::istream fin(&fb);
1527 ReadStateFromStream(fin);
1528 fb.close();
1529 }
1530 if (!fTxtWeightsOnly) {
1531 // ---- read the ROOT file
1532 TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1533 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1534 << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1535 TFile* rfile = TFile::Open( rfname, "READ" );
1536 ReadStateFromStream( *rfile );
1537 rfile->Close();
1538 }
1539}
1540////////////////////////////////////////////////////////////////////////////////
1541/// for reading from memory
1542
1544 void* doc = gTools().xmlengine().ParseString(xmlstr);
1545 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1546 ReadStateFromXML(rootnode);
1548
1549 return;
1550}
1551
1552////////////////////////////////////////////////////////////////////////////////
1553
1555{
1556
1558 gTools().ReadAttr( methodNode, "Method", fullMethodName );
1559
1560 fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1561
1562 // update logger
1563 Log().SetSource( GetName() );
1564 Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1565 << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1566
1567 // after the method name is read, the testvar can be set
1568 SetTestvarName();
1569
1570 TString nodeName("");
1571 void* ch = gTools().GetChild(methodNode);
1572 while (ch!=0) {
1573 nodeName = TString( gTools().GetName(ch) );
1574
1575 if (nodeName=="GeneralInfo") {
1576 // read analysis type
1577
1578 TString name(""),val("");
1579 void* antypeNode = gTools().GetChild(ch);
1580 while (antypeNode) {
1581 gTools().ReadAttr( antypeNode, "name", name );
1582
1583 if (name == "TrainingTime")
1584 gTools().ReadAttr( antypeNode, "value", fTrainTime );
1585
1586 if (name == "AnalysisType") {
1587 gTools().ReadAttr( antypeNode, "value", val );
1588 val.ToLower();
1589 if (val == "regression" ) SetAnalysisType( Types::kRegression );
1590 else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1591 else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1592 else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1593 }
1594
1595 if (name == "TMVA Release" || name == "TMVA") {
1596 TString s;
1597 gTools().ReadAttr( antypeNode, "value", s);
1598 fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1599 Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1600 }
1601
1602 if (name == "ROOT Release" || name == "ROOT") {
1603 TString s;
1604 gTools().ReadAttr( antypeNode, "value", s);
1605 fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1606 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1607 << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1608 }
1610 }
1611 }
1612 else if (nodeName=="Options") {
1613 ReadOptionsFromXML(ch);
1614 ParseOptions();
1615
1616 }
1617 else if (nodeName=="Variables") {
1618 ReadVariablesFromXML(ch);
1619 }
1620 else if (nodeName=="Spectators") {
1621 ReadSpectatorsFromXML(ch);
1622 }
1623 else if (nodeName=="Classes") {
1624 if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1625 }
1626 else if (nodeName=="Targets") {
1627 if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1628 }
1629 else if (nodeName=="Transformations") {
1630 GetTransformationHandler().ReadFromXML(ch);
1631 }
1632 else if (nodeName=="MVAPdfs") {
1634 if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1635 if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1636 void* pdfnode = gTools().GetChild(ch);
1637 if (pdfnode) {
1638 gTools().ReadAttr(pdfnode, "Name", pdfname);
1639 fMVAPdfS = new PDF(pdfname);
1640 fMVAPdfS->ReadXML(pdfnode);
1642 gTools().ReadAttr(pdfnode, "Name", pdfname);
1643 fMVAPdfB = new PDF(pdfname);
1644 fMVAPdfB->ReadXML(pdfnode);
1645 }
1646 }
1647 else if (nodeName=="Weights") {
1648 ReadWeightsFromXML(ch);
1649 }
1650 else {
1651 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1652 }
1653 ch = gTools().GetNextChild(ch);
1654
1655 }
1656
1657 // update transformation handler
1658 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1659}
1660
1661////////////////////////////////////////////////////////////////////////////////
1662/// read the header from the weight files of the different MVA methods
1663
1665{
1666 char buf[512];
1667
1668 // when reading from stream, we assume the files are produced with TMVA<=397
1669 SetAnalysisType(Types::kClassification);
1670
1671
1672 // first read the method name
1673 GetLine(fin,buf);
1674 while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1675 TString namestr(buf);
1676
1677 TString methodType = namestr(0,namestr.Index("::"));
1678 methodType = methodType(methodType.Last(' '),methodType.Length());
1680
1681 TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1682 methodName = methodName.Strip(TString::kLeading);
1683 if (methodName == "") methodName = methodType;
1684 fMethodName = methodName;
1685
1686 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1687
1688 // update logger
1689 Log().SetSource( GetName() );
1690
1691 // now the question is whether to read the variables first or the options (well, of course the order
1692 // of writing them needs to agree)
1693 //
1694 // the option "Decorrelation" is needed to decide if the variables we
1695 // read are decorrelated or not
1696 //
1697 // the variables are needed by some methods (TMLP) to build the NN
1698 // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1699 // we read the variables, and then we process the options
1700
1701 // now read all options
1702 GetLine(fin,buf);
1703 while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1704 ReadOptionsFromStream(fin);
1705 ParseOptions();
1706
1707 // Now read variable info
1708 fin.getline(buf,512);
1709 while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1710 ReadVarsFromStream(fin);
1711
1712 // now we process the options (of the derived class)
1713 ProcessOptions();
1714
1715 if (IsNormalised()) {
1717 GetTransformationHandler().AddTransformation( new VariableNormalizeTransform(DataInfo()), -1 );
1718 norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1719 }
1721 if ( fVarTransformString == "None") {
1722 if (fUseDecorr)
1723 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1724 } else if ( fVarTransformString == "Decorrelate" ) {
1725 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1726 } else if ( fVarTransformString == "PCA" ) {
1727 varTrafo = GetTransformationHandler().AddTransformation( new VariablePCATransform(DataInfo()), -1 );
1728 } else if ( fVarTransformString == "Uniform" ) {
1729 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1730 } else if ( fVarTransformString == "Gauss" ) {
1731 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1732 } else if ( fVarTransformString == "GaussDecorr" ) {
1733 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1734 varTrafo2 = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1735 } else {
1736 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1737 << fVarTransformString << "' unknown." << Endl;
1738 }
1739 // Now read decorrelation matrix if available
1740 if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1741 fin.getline(buf,512);
1742 while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1743 if (varTrafo) {
1744 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1745 varTrafo->ReadTransformationFromStream(fin, trafo );
1746 }
1747 if (varTrafo2) {
1748 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1749 varTrafo2->ReadTransformationFromStream(fin, trafo );
1750 }
1751 }
1752
1753
1754 if (HasMVAPdfs()) {
1755 // Now read the MVA PDFs
1756 fin.getline(buf,512);
1757 while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1758 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1759 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1760 fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1761 fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1762 fMVAPdfS->SetReadingVersion( GetTrainingTMVAVersionCode() );
1763 fMVAPdfB->SetReadingVersion( GetTrainingTMVAVersionCode() );
1764
1765 fin >> *fMVAPdfS;
1766 fin >> *fMVAPdfB;
1767 }
1768
1769 // Now read weights
1770 fin.getline(buf,512);
1771 while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1772 fin.getline(buf,512);
1773 ReadWeightsFromStream( fin );
1774
1775 // update transformation handler
1776 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1777
1778}
1779
1780////////////////////////////////////////////////////////////////////////////////
1781/// write the list of variables (name, min, max) for a given data
1782/// transformation method to the stream
1783
1784void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1785{
1786 o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1787 std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1788 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1789 o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1790 varIt = DataInfo().GetSpectatorInfos().begin();
1791 for (; varIt!=DataInfo().GetSpectatorInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1792}
1793
1794////////////////////////////////////////////////////////////////////////////////
1795/// Read the variables (name, min, max) for a given data
1796/// transformation method from the stream. In the stream we only
1797/// expect the limits which will be set
1798
1800{
1801 TString dummy;
1803 istr >> dummy >> readNVar;
1804
1805 if (readNVar!=DataInfo().GetNVariables()) {
1806 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1807 << " while there are " << readNVar << " variables declared in the file"
1808 << Endl;
1809 }
1810
1811 // we want to make sure all variables are read in the order they are defined
1813 std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1814 int varIdx = 0;
1815 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt, ++varIdx) {
1816 varInfo.ReadFromStream(istr);
1817 if (varIt->GetExpression() == varInfo.GetExpression()) {
1818 varInfo.SetExternalLink((*varIt).GetExternalLink());
1819 (*varIt) = varInfo;
1820 }
1821 else {
1822 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1823 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1824 Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1825 Log() << kINFO << "the correct working of the method):" << Endl;
1826 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1827 Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1828 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1829 }
1830 }
1831}
1832
1833////////////////////////////////////////////////////////////////////////////////
1834/// write variable info to XML
1835
1836void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1837{
1838 void* vars = gTools().AddChild(parent, "Variables");
1839 gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1840
1841 for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1842 VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1843 void* var = gTools().AddChild( vars, "Variable" );
1844 gTools().AddAttr( var, "VarIndex", idx );
1845 vi.AddToXML( var );
1846 }
1847}
1848
1849////////////////////////////////////////////////////////////////////////////////
1850/// write spectator info to XML
1851
1853{
1854 void* specs = gTools().AddChild(parent, "Spectators");
1855
1856 UInt_t writeIdx=0;
1857 for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1858
1859 VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1860
1861 // we do not want to write spectators that are category-cuts,
1862 // except if the method is the category method and the spectators belong to it
1863 if (vi.GetVarType()=='C') continue;
1864
1865 void* spec = gTools().AddChild( specs, "Spectator" );
1866 gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1867 vi.AddToXML( spec );
1868 }
1869 gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1870}
1871
1872////////////////////////////////////////////////////////////////////////////////
1873/// write class info to XML
1874
1875void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1876{
1877 UInt_t nClasses=DataInfo().GetNClasses();
1878
1879 void* classes = gTools().AddChild(parent, "Classes");
1880 gTools().AddAttr( classes, "NClass", nClasses );
1881
1882 for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1883 ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1884 TString className =classInfo->GetName();
1885 UInt_t classNumber=classInfo->GetNumber();
1886
1887 void* classNode=gTools().AddChild(classes, "Class");
1888 gTools().AddAttr( classNode, "Name", className );
1889 gTools().AddAttr( classNode, "Index", classNumber );
1890 }
1891}
1892////////////////////////////////////////////////////////////////////////////////
1893/// write target info to XML
1894
1895void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1896{
1897 void* targets = gTools().AddChild(parent, "Targets");
1898 gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1899
1900 for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1901 VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1902 void* tar = gTools().AddChild( targets, "Target" );
1903 gTools().AddAttr( tar, "TargetIndex", idx );
1904 vi.AddToXML( tar );
1905 }
1906}
1907
1908////////////////////////////////////////////////////////////////////////////////
1909/// read variable info from XML
1910
1912{
1914 gTools().ReadAttr( varnode, "NVar", readNVar);
1915
1916 if (readNVar!=DataInfo().GetNVariables()) {
1917 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1918 << " while there are " << readNVar << " variables declared in the file"
1919 << Endl;
1920 }
1921
1922 // we want to make sure all variables are read in the order they are defined
1924 int varIdx = 0;
1925 void* ch = gTools().GetChild(varnode);
1926 while (ch) {
1927 gTools().ReadAttr( ch, "VarIndex", varIdx);
1928 existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1929 readVarInfo.ReadFromXML(ch);
1930
1931 if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1932 readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1934 }
1935 else {
1936 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1937 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1938 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1939 Log() << kINFO << "correct working of the method):" << Endl;
1940 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1941 Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1942 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1943 }
1944 ch = gTools().GetNextChild(ch);
1945 }
1946}
1947
1948////////////////////////////////////////////////////////////////////////////////
1949/// read spectator info from XML
1950
1952{
1954 gTools().ReadAttr( specnode, "NSpec", readNSpec);
1955
1956 if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1957 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1958 << " while there are " << readNSpec << " spectators declared in the file"
1959 << Endl;
1960 }
1961
1962 // we want to make sure all variables are read in the order they are defined
1964 int specIdx = 0;
1965 void* ch = gTools().GetChild(specnode);
1966 while (ch) {
1967 gTools().ReadAttr( ch, "SpecIndex", specIdx);
1968 existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1969 readSpecInfo.ReadFromXML(ch);
1970
1971 if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1972 readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1974 }
1975 else {
1976 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1977 Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1978 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1979 Log() << kINFO << "correct working of the method):" << Endl;
1980 Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1981 Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1982 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1983 }
1984 ch = gTools().GetNextChild(ch);
1985 }
1986}
1987
1988////////////////////////////////////////////////////////////////////////////////
1989/// read number of classes from XML
1990
1992{
1994 // coverity[tainted_data_argument]
1995 gTools().ReadAttr( clsnode, "NClass", readNCls);
1996
1997 TString className="";
1999 void* ch = gTools().GetChild(clsnode);
2000 if (!ch) {
2001 for (UInt_t icls = 0; icls<readNCls;++icls) {
2002 TString classname = TString::Format("class%i",icls);
2003 DataInfo().AddClass(classname);
2004
2005 }
2006 }
2007 else{
2008 while (ch) {
2009 gTools().ReadAttr( ch, "Index", classIndex);
2010 gTools().ReadAttr( ch, "Name", className );
2011 DataInfo().AddClass(className);
2012
2013 ch = gTools().GetNextChild(ch);
2014 }
2015 }
2016
2017 // retrieve signal and background class index
2018 if (DataInfo().GetClassInfo("Signal") != 0) {
2019 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
2020 }
2021 else
2022 fSignalClass=0;
2023 if (DataInfo().GetClassInfo("Background") != 0) {
2024 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
2025 }
2026 else
2027 fBackgroundClass=1;
2028}
2029
2030////////////////////////////////////////////////////////////////////////////////
2031/// read target info from XML
2032
2034{
2036 gTools().ReadAttr( tarnode, "NTrgt", readNTar);
2037
2038 int tarIdx = 0;
2039 TString expression;
2040 void* ch = gTools().GetChild(tarnode);
2041 while (ch) {
2042 gTools().ReadAttr( ch, "TargetIndex", tarIdx);
2043 gTools().ReadAttr( ch, "Expression", expression);
2044 DataInfo().AddTarget(expression,"","",0,0);
2045
2046 ch = gTools().GetNextChild(ch);
2047 }
2048}
2049
2050////////////////////////////////////////////////////////////////////////////////
2051/// returns the ROOT directory where info/histograms etc of the
2052/// corresponding MVA method instance are stored
2053
2055{
2056 if (fBaseDir != 0) return fBaseDir;
2057 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
2058
2059 if (IsSilentFile()) {
2060 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2061 << "MethodBase::BaseDir() - No directory exists when running a Method without output file. Enable the "
2062 "output when creating the factory"
2063 << Endl;
2064 }
2065
2066 TDirectory* methodDir = MethodBaseDir();
2067 if (methodDir==0)
2068 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
2069
2070 TString defaultDir = GetMethodName();
2071 TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
2072 if(!sdir)
2073 {
2074 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
2075 sdir = methodDir->mkdir(defaultDir);
2076 sdir->cd();
2077 // write weight file name into target file
2078 if (fModelPersistence) {
2080 TObjString wfileName( GetWeightFileName() );
2081 wfilePath.Write( "TrainingPath" );
2082 wfileName.Write( "WeightFileName" );
2083 }
2084 }
2085
2086 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
2087 return sdir;
2088}
2089
2090////////////////////////////////////////////////////////////////////////////////
2091/// returns the ROOT directory where all instances of the
2092/// corresponding MVA method are stored
2093
2095{
2096 if (fMethodBaseDir != 0) {
2097 return fMethodBaseDir;
2098 }
2099
2100 const char *datasetName = DataInfo().GetName();
2101
2102 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodTypeName()
2103 << " not set yet --> check if already there.." << Endl;
2104
2105 TDirectory *factoryBaseDir = GetFile();
2106 if (!factoryBaseDir) return nullptr;
2107 fMethodBaseDir = factoryBaseDir->GetDirectory(datasetName);
2108 if (!fMethodBaseDir) {
2109 fMethodBaseDir = factoryBaseDir->mkdir(datasetName, TString::Format("Base directory for dataset %s", datasetName).Data());
2110 if (!fMethodBaseDir) {
2111 Log() << kFATAL << "Can not create dir " << datasetName;
2112 }
2113 }
2114 TString methodTypeDir = TString::Format("Method_%s", GetMethodTypeName().Data());
2115 fMethodBaseDir = fMethodBaseDir->GetDirectory(methodTypeDir.Data());
2116
2117 if (!fMethodBaseDir) {
2119 TString methodTypeDirHelpStr = TString::Format("Directory for all %s methods", GetMethodTypeName().Data());
2120 fMethodBaseDir = datasetDir->mkdir(methodTypeDir.Data(), methodTypeDirHelpStr);
2121 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodName()
2122 << " does not exist yet--> created it" << Endl;
2123 }
2124
2125 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName)
2126 << "Return from MethodBaseDir() after creating base directory " << Endl;
2127 return fMethodBaseDir;
2128}
2129
2130////////////////////////////////////////////////////////////////////////////////
2131/// set directory of weight file
2132
2134{
2135 fFileDir = fileDir;
2136 gSystem->mkdir( fFileDir, kTRUE );
2137}
2138
2139////////////////////////////////////////////////////////////////////////////////
2140/// set the weight file name (depreciated)
2141
2146
2147////////////////////////////////////////////////////////////////////////////////
2148/// retrieve weight file name
2149
2151{
2152 if (fWeightFile!="") return fWeightFile;
2153
2154 // the default consists of
2155 // directory/jobname_methodname_suffix.extension.{root/txt}
2156 TString suffix = "";
2157 TString wFileDir(GetWeightFileDir());
2158 TString wFileName = GetJobName() + "_" + GetMethodName() +
2159 suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml";
2160 if (wFileDir.IsNull() ) return wFileName;
2161 // add weight file directory of it is not null
2162 return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2163 + wFileName );
2164}
2165////////////////////////////////////////////////////////////////////////////////
2166/// writes all MVA evaluation histograms to file
2167
2169{
2170 BaseDir()->cd();
2171
2172
2173 // write MVA PDFs to file - if exist
2174 if (0 != fMVAPdfS) {
2175 fMVAPdfS->GetOriginalHist()->Write();
2176 fMVAPdfS->GetSmoothedHist()->Write();
2177 fMVAPdfS->GetPDFHist()->Write();
2178 }
2179 if (0 != fMVAPdfB) {
2180 fMVAPdfB->GetOriginalHist()->Write();
2181 fMVAPdfB->GetSmoothedHist()->Write();
2182 fMVAPdfB->GetPDFHist()->Write();
2183 }
2184
2185 // write result-histograms
2186 Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2187 if (!results)
2188 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2189 << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2190 << "/kMaxAnalysisType" << Endl;
2191 results->GetStorage()->Write();
2193 // skipping plotting of variables if too many (default is 200)
2194 if ((int) DataInfo().GetNVariables()< gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables)
2195 GetTransformationHandler().PlotVariables (GetEventCollection( Types::kTesting ), BaseDir() );
2196 else
2197 Log() << kINFO << TString::Format("Dataset[%s] : ",DataInfo().GetName())
2198 << " variable plots are not produces ! The number of variables is " << DataInfo().GetNVariables()
2199 << " , it is larger than " << gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables << Endl;
2200 }
2201}
2202
2203////////////////////////////////////////////////////////////////////////////////
2204/// write special monitoring histograms to file
2205/// dummy implementation here -----------------
2206
2210
2211////////////////////////////////////////////////////////////////////////////////
2212/// reads one line from the input stream
2213/// checks for certain keywords and interprets
2214/// the line if keywords are found
2215
2216Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2217{
2218 fin.getline(buf,512);
2219 TString line(buf);
2220 if (line.BeginsWith("TMVA Release")) {
2221 Ssiz_t start = line.First('[')+1;
2222 Ssiz_t length = line.Index("]",start)-start;
2223 TString code = line(start,length);
2224 std::stringstream s(code.Data());
2225 s >> fTMVATrainingVersion;
2226 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2227 }
2228 if (line.BeginsWith("ROOT Release")) {
2229 Ssiz_t start = line.First('[')+1;
2230 Ssiz_t length = line.Index("]",start)-start;
2231 TString code = line(start,length);
2232 std::stringstream s(code.Data());
2233 s >> fROOTTrainingVersion;
2234 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2235 }
2236 if (line.BeginsWith("Analysis type")) {
2237 Ssiz_t start = line.First('[')+1;
2238 Ssiz_t length = line.Index("]",start)-start;
2239 TString code = line(start,length);
2240 std::stringstream s(code.Data());
2241 std::string analysisType;
2242 s >> analysisType;
2243 if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2244 else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2245 else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2246 else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2247
2248 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2249 << (GetAnalysisType() == Types::kRegression ? "Regression" :
2250 (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2251 }
2252
2253 return true;
2254}
2255
2256////////////////////////////////////////////////////////////////////////////////
2257/// Create PDFs of the MVA output variables
2258
2260{
2261 Data()->SetCurrentType(Types::kTraining);
2262
2263 // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2264 // otherwise they will be only used 'online'
2266 ( Data()->GetResults(GetMethodName(), Types::kTraining, Types::kClassification) );
2267
2268 if (mvaRes==0 || mvaRes->GetSize()==0) {
2269 Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2270 }
2271
2272 Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2273 Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2274
2275 // create histograms that serve as basis to create the MVA Pdfs
2276 TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2277 fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2278 TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2279 fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2280
2281
2282 // compute sum of weights properly
2283 histMVAPdfS->Sumw2();
2284 histMVAPdfB->Sumw2();
2285
2286 // fill histograms
2287 for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2288 Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2289 Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2290
2291 if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2292 else histMVAPdfB->Fill( theVal, theWeight );
2293 }
2294
2297
2298 // momentary hack for ROOT problem
2299 if(!IsSilentFile())
2300 {
2301 histMVAPdfS->Write();
2302 histMVAPdfB->Write();
2303 }
2304 // create PDFs
2305 fMVAPdfS->BuildPDF ( histMVAPdfS );
2306 fMVAPdfB->BuildPDF ( histMVAPdfB );
2307 fMVAPdfS->ValidatePDF( histMVAPdfS );
2308 fMVAPdfB->ValidatePDF( histMVAPdfB );
2309
2310 if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2311 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2312 << TString::Format( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2313 GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2314 << Endl;
2315 }
2316
2317 delete histMVAPdfS;
2318 delete histMVAPdfB;
2319}
2320
2322 // the simple one, automatically calculates the mvaVal and uses the
2323 // SAME sig/bkg ratio as given in the training sample (typically 50/50
2324 // .. (NormMode=EqualNumEvents) but can be different)
2325 if (!fMVAPdfS || !fMVAPdfB) {
2326 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2327 CreateMVAPdfs();
2328 }
2329 Double_t sigFraction = DataInfo().GetTrainingSumSignalWeights() / (DataInfo().GetTrainingSumSignalWeights() + DataInfo().GetTrainingSumBackgrWeights() );
2330 Double_t mvaVal = GetMvaValue(ev);
2331
2332 return GetProba(mvaVal,sigFraction);
2333
2334}
2335////////////////////////////////////////////////////////////////////////////////
2336/// compute likelihood ratio
2337
2339{
2340 if (!fMVAPdfS || !fMVAPdfB) {
2341 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2342 return -1.0;
2343 }
2344 Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2345 Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2346
2347 Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2348
2349 return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2350}
2351
2352////////////////////////////////////////////////////////////////////////////////
2353/// compute rarity:
2354/// \f[
2355/// R(x) = \int_{[-\infty..x]} { PDF(x') dx' }
2356/// \f]
2357/// where PDF(x) is the PDF of the classifier's signal or background distribution
2358
2360{
2361 if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2362 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2363 << "select option \"CreateMVAPdfs\"" << Endl;
2364 return 0.0;
2365 }
2366
2367 PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2368
2369 return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2370}
2371
2372////////////////////////////////////////////////////////////////////////////////
2373/// fill background efficiency (resp. rejection) versus signal efficiency plots
2374/// returns signal efficiency at background efficiency indicated in theString
2375
2377{
2378 Data()->SetCurrentType(type);
2379 Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2380 std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2381
2382 // parse input string for required background efficiency
2384
2385 // sanity check
2387 if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2388 else if (list->GetSize() > 2) {
2389 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2390 << " in string: " << theString
2391 << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2392 delete list;
2393 return -1;
2394 }
2395
2396 // sanity check
2397 if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2398 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2399 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2400 delete list;
2401 return -1.0;
2402 }
2403
2404 // create histograms
2405
2406 // first, get efficiency histograms for signal and background
2407 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2408 Double_t xmin = effhist->GetXaxis()->GetXmin();
2409 Double_t xmax = effhist->GetXaxis()->GetXmax();
2410
2412
2413 // first round ? --> create histograms
2414 if (results->DoesExist("MVA_EFF_S")==0) {
2415
2416 // for efficiency plot
2417 TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2418 TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2419 results->Store(eff_s, "MVA_EFF_S");
2420 results->Store(eff_b, "MVA_EFF_B");
2421
2422 // sign if cut
2423 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2424
2425 // this method is unbinned
2426 nevtS = 0;
2427 for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2428
2429 // read the tree
2430 Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2431 Float_t theWeight = GetEvent(ievt)->GetWeight();
2432 Float_t theVal = (*mvaRes)[ievt];
2433
2434 // select histogram depending on if sig or bgd
2436
2437 // count signal and background events in tree
2438 if (isSignal) nevtS+=theWeight;
2439
2440 TAxis* axis = theHist->GetXaxis();
2441 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2442 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2443 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2444 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2445 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2446
2447 if (sign > 0)
2448 for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2449 else if (sign < 0)
2450 for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2451 else
2452 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2453 }
2454
2455 // renormalise maximum to <=1
2456 // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2457 // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2458
2459 eff_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_s->GetMaximum()) );
2460 eff_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_b->GetMaximum()) );
2461
2462 // background efficiency versus signal efficiency
2463 TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2464 results->Store(eff_BvsS, "MVA_EFF_BvsS");
2465 eff_BvsS->SetXTitle( "Signal eff" );
2466 eff_BvsS->SetYTitle( "Backgr eff" );
2467
2468 // background rejection (=1-eff.) versus signal efficiency
2469 TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2470 results->Store(rej_BvsS);
2471 rej_BvsS->SetXTitle( "Signal eff" );
2472 rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2473
2474 // inverse background eff (1/eff.) versus signal efficiency
2475 TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2476 GetTestvarName(), fNbins, 0, 1 );
2477 results->Store(inveff_BvsS);
2478 inveff_BvsS->SetXTitle( "Signal eff" );
2479 inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2480
2481 // use root finder
2482 // spline background efficiency plot
2483 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2485 fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2486 fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2487
2488 // verify spline sanity
2489 gTools().CheckSplines( eff_s, fSplRefS );
2490 gTools().CheckSplines( eff_b, fSplRefB );
2491 }
2492
2493 // make the background-vs-signal efficiency plot
2494
2495 // create root finder
2496 RootFinder rootFinder( this, fXmin, fXmax );
2497
2498 Double_t effB = 0;
2499 fEffS = eff_s; // to be set for the root finder
2500 for (Int_t bini=1; bini<=fNbins; bini++) {
2501
2502 // find cut value corresponding to a given signal efficiency
2503 Double_t effS = eff_BvsS->GetBinCenter( bini );
2504 Double_t cut = rootFinder.Root( effS );
2505
2506 // retrieve background efficiency for given cut
2507 if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2508 else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2509
2510 // and fill histograms
2511 eff_BvsS->SetBinContent( bini, effB );
2512 rej_BvsS->SetBinContent( bini, 1.0-effB );
2513 if (effB>std::numeric_limits<double>::epsilon())
2514 inveff_BvsS->SetBinContent( bini, 1.0/effB );
2515 }
2516
2517 // create splines for histogram
2518 fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2519
2520 // search for overlap point where, when cutting on it,
2521 // one would obtain: eff_S = rej_B = 1 - eff_B
2522 Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2523 Int_t nbins_ = 5000;
2524 for (Int_t bini=1; bini<=nbins_; bini++) {
2525
2526 // get corresponding signal and background efficiencies
2527 effS = (bini - 0.5)/Float_t(nbins_);
2528 rejB = 1.0 - fSpleffBvsS->Eval( effS );
2529
2530 // find signal efficiency that corresponds to required background efficiency
2531 if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2532 effS_ = effS;
2533 rejB_ = rejB;
2534 }
2535
2536 // find cut that corresponds to signal efficiency and update signal-like criterion
2537 Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2538 SetSignalReferenceCut( cut );
2539 fEffS = 0;
2540 }
2541
2542 // must exist...
2543 if (0 == fSpleffBvsS) {
2544 delete list;
2545 return 0.0;
2546 }
2547
2548 // now find signal efficiency that corresponds to required background efficiency
2549 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2550 Int_t nbins_ = 1000;
2551
2552 if (computeArea) {
2553
2554 // compute area of rej-vs-eff plot
2555 Double_t integral = 0;
2556 for (Int_t bini=1; bini<=nbins_; bini++) {
2557
2558 // get corresponding signal and background efficiencies
2559 effS = (bini - 0.5)/Float_t(nbins_);
2560 effB = fSpleffBvsS->Eval( effS );
2561 integral += (1.0 - effB);
2562 }
2563 integral /= nbins_;
2564
2565 delete list;
2566 return integral;
2567 }
2568 else {
2569
2570 // that will be the value of the efficiency retured (does not affect
2571 // the efficiency-vs-bkg plot which is done anyway.
2572 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2573
2574 // find precise efficiency value
2575 for (Int_t bini=1; bini<=nbins_; bini++) {
2576
2577 // get corresponding signal and background efficiencies
2578 effS = (bini - 0.5)/Float_t(nbins_);
2579 effB = fSpleffBvsS->Eval( effS );
2580
2581 // find signal efficiency that corresponds to required background efficiency
2582 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2583 effS_ = effS;
2584 effB_ = effB;
2585 }
2586
2587 // take mean between bin above and bin below
2588 effS = 0.5*(effS + effS_);
2589
2590 effSerr = 0;
2591 if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2592
2593 delete list;
2594 return effS;
2595 }
2596
2597 return -1;
2598}
2599
2600////////////////////////////////////////////////////////////////////////////////
2601
2603{
2604 Data()->SetCurrentType(Types::kTraining);
2605
2606 Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, Types::kNoAnalysisType);
2607
2608 // fill background efficiency (resp. rejection) versus signal efficiency plots
2609 // returns signal efficiency at background efficiency indicated in theString
2610
2611 // parse input string for required background efficiency
2613 // sanity check
2614
2615 if (list->GetSize() != 2) {
2616 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2617 << " in string: " << theString
2618 << " | required format, e.g., Efficiency:0.05" << Endl;
2619 delete list;
2620 return -1;
2621 }
2622 // that will be the value of the efficiency retured (does not affect
2623 // the efficiency-vs-bkg plot which is done anyway.
2624 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2625
2626 delete list;
2627
2628 // sanity check
2629 if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2630 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2631 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2632 << Endl;
2633 return -1.0;
2634 }
2635
2636 // create histogram
2637
2638 // first, get efficiency histograms for signal and background
2639 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2640 Double_t xmin = effhist->GetXaxis()->GetXmin();
2641 Double_t xmax = effhist->GetXaxis()->GetXmax();
2642
2643 // first round ? --> create and fill histograms
2644 if (results->DoesExist("MVA_TRAIN_S")==0) {
2645
2646 // classifier response distributions for test sample
2647 Double_t sxmax = fXmax+0.00001;
2648
2649 // MVA plots on the training sample (check for overtraining)
2650 TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2651 TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2652 results->Store(mva_s_tr, "MVA_TRAIN_S");
2653 results->Store(mva_b_tr, "MVA_TRAIN_B");
2654 mva_s_tr->Sumw2();
2655 mva_b_tr->Sumw2();
2656
2657 // Training efficiency plots
2658 TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2659 fNbinsH, xmin, xmax );
2660 TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2661 fNbinsH, xmin, xmax );
2662 results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2663 results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2664
2665 // sign if cut
2666 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2667
2668 std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2669 assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2670
2671 // this method is unbinned
2672 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2673
2674 Data()->SetCurrentEvent(ievt);
2675 const Event* ev = GetEvent();
2676
2677 Double_t theVal = mvaValues[ievt];
2678 Double_t theWeight = ev->GetWeight();
2679
2680 TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2681 TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2682
2683 theClsHist->Fill( theVal, theWeight );
2684
2685 TAxis* axis = theEffHist->GetXaxis();
2686 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2687 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2688 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2689 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2690 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2691
2692 if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2693 else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2694 }
2695
2696 // normalise output distributions
2697 // uncomment those (and several others if you want unnormalized output
2700
2701 // renormalise to maximum
2702 mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2703 mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2704
2705 // Training background efficiency versus signal efficiency
2706 TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2707 // Training background rejection (=1-eff.) versus signal efficiency
2708 TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2709 results->Store(eff_bvss, "EFF_BVSS_TR");
2710 results->Store(rej_bvss, "REJ_BVSS_TR");
2711
2712 // use root finder
2713 // spline background efficiency plot
2714 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2716 if (fSplTrainRefS) delete fSplTrainRefS;
2717 if (fSplTrainRefB) delete fSplTrainRefB;
2718 fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2719 fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2720
2721 // verify spline sanity
2722 gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2723 gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2724 }
2725
2726 // make the background-vs-signal efficiency plot
2727
2728 // create root finder
2729 RootFinder rootFinder(this, fXmin, fXmax );
2730
2731 Double_t effB = 0;
2732 fEffS = results->GetHist("MVA_TRAINEFF_S");
2733 for (Int_t bini=1; bini<=fNbins; bini++) {
2734
2735 // find cut value corresponding to a given signal efficiency
2736 Double_t effS = eff_bvss->GetBinCenter( bini );
2737
2738 Double_t cut = rootFinder.Root( effS );
2739
2740 // retrieve background efficiency for given cut
2741 if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2742 else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2743
2744 // and fill histograms
2745 eff_bvss->SetBinContent( bini, effB );
2746 rej_bvss->SetBinContent( bini, 1.0-effB );
2747 }
2748 fEffS = 0;
2749
2750 // create splines for histogram
2751 fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2752 }
2753
2754 // must exist...
2755 if (0 == fSplTrainEffBvsS) return 0.0;
2756
2757 // now find signal efficiency that corresponds to required background efficiency
2758 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2759 Int_t nbins_ = 1000;
2760 for (Int_t bini=1; bini<=nbins_; bini++) {
2761
2762 // get corresponding signal and background efficiencies
2763 effS = (bini - 0.5)/Float_t(nbins_);
2764 effB = fSplTrainEffBvsS->Eval( effS );
2765
2766 // find signal efficiency that corresponds to required background efficiency
2767 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2768 effS_ = effS;
2769 effB_ = effB;
2770 }
2771
2772 return 0.5*(effS + effS_); // the mean between bin above and bin below
2773}
2774
2775////////////////////////////////////////////////////////////////////////////////
2776
2777std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2778{
2779 Data()->SetCurrentType(Types::kTesting);
2780 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
2781 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2782
2783 purity.push_back(resMulticlass->GetAchievablePur());
2784 return resMulticlass->GetAchievableEff();
2785}
2786
2787////////////////////////////////////////////////////////////////////////////////
2788
2789std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2790{
2791 Data()->SetCurrentType(Types::kTraining);
2792 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTraining, Types::kMulticlass));
2793 if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2794
2795 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2796 for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2797 resMulticlass->GetBestMultiClassCuts(icls);
2798 }
2799
2800 purity.push_back(resMulticlass->GetAchievablePur());
2801 return resMulticlass->GetAchievableEff();
2802}
2803
2804////////////////////////////////////////////////////////////////////////////////
2805/// Construct a confusion matrix for a multiclass classifier. The confusion
2806/// matrix compares, in turn, each class agaist all other classes in a pair-wise
2807/// fashion. In rows with index \f$ k_r = 0 ... K \f$, \f$ k_r \f$ is
2808/// considered signal for the sake of comparison and for each column
2809/// \f$ k_c = 0 ... K \f$ the corresponding class is considered background.
2810///
2811/// Note that the diagonal elements will be returned as NaN since this will
2812/// compare a class against itself.
2813///
2814/// \see TMVA::ResultsMulticlass::GetConfusionMatrix
2815///
2816/// \param[in] effB The background efficiency for which to evaluate.
2817/// \param[in] type The data set on which to evaluate (training, testing ...).
2818///
2819/// \return A matrix containing signal efficiencies for the given background
2820/// efficiency. The diagonal elements are NaN since this measure is
2821/// meaningless (comparing a class against itself).
2822///
2823
2825{
2826 if (GetAnalysisType() != Types::kMulticlass) {
2827 Log() << kFATAL << "Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2828 return TMatrixD(0, 0);
2829 }
2830
2831 Data()->SetCurrentType(type);
2833 dynamic_cast<ResultsMulticlass *>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
2834
2835 if (resMulticlass == nullptr) {
2836 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2837 << "unable to create pointer in GetMulticlassEfficiency, exiting." << Endl;
2838 return TMatrixD(0, 0);
2839 }
2840
2841 return resMulticlass->GetConfusionMatrix(effB);
2842}
2843
2844////////////////////////////////////////////////////////////////////////////////
2845/// compute significance of mean difference
2846/// \f[
2847/// significance = \frac{|<S> - <B>|}{\sqrt{RMS_{S2} + RMS_{B2}}}
2848/// \f]
2849
2851{
2852 Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2853
2854 return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2855}
2856
2857////////////////////////////////////////////////////////////////////////////////
2858/// compute "separation" defined as
2859/// \f[
2860/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2861/// \f]
2862
2867
2868////////////////////////////////////////////////////////////////////////////////
2869/// compute "separation" defined as
2870/// \f[
2871/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2872/// \f]
2873
2875{
2876 // note, if zero pointers given, use internal pdf
2877 // sanity check first
2878 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2879 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2880 if (!pdfS) pdfS = fSplS;
2881 if (!pdfB) pdfB = fSplB;
2882
2883 if (!fSplS || !fSplB) {
2884 Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2885 << " fSplS or fSplB are not yet filled" << Endl;
2886 return 0;
2887 }else{
2888 return gTools().GetSeparation( *pdfS, *pdfB );
2889 }
2890}
2891
2892////////////////////////////////////////////////////////////////////////////////
2893/// calculate the area (integral) under the ROC curve as a
2894/// overall quality measure of the classification
2895
2897{
2898 // note, if zero pointers given, use internal pdf
2899 // sanity check first
2900 if ((!histS && histB) || (histS && !histB))
2901 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2902
2903 if (histS==0 || histB==0) return 0.;
2904
2905 TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2906 TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2907
2908
2909 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2910 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2911
2912 Double_t integral = 0;
2913 UInt_t nsteps = 1000;
2914 Double_t step = (xmax-xmin)/Double_t(nsteps);
2915 Double_t cut = xmin;
2916 for (UInt_t i=0; i<nsteps; i++) {
2917 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2918 cut+=step;
2919 }
2920 delete pdfS;
2921 delete pdfB;
2922 return integral*step;
2923}
2924
2925
2926////////////////////////////////////////////////////////////////////////////////
2927/// calculate the area (integral) under the ROC curve as a
2928/// overall quality measure of the classification
2929
2931{
2932 // note, if zero pointers given, use internal pdf
2933 // sanity check first
2934 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2935 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2936 if (!pdfS) pdfS = fSplS;
2937 if (!pdfB) pdfB = fSplB;
2938
2939 if (pdfS==0 || pdfB==0) return 0.;
2940
2941 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2942 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2943
2944 Double_t integral = 0;
2945 UInt_t nsteps = 1000;
2946 Double_t step = (xmax-xmin)/Double_t(nsteps);
2947 Double_t cut = xmin;
2948 for (UInt_t i=0; i<nsteps; i++) {
2949 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2950 cut+=step;
2951 }
2952 return integral*step;
2953}
2954
2955////////////////////////////////////////////////////////////////////////////////
2956/// plot significance, \f$ \frac{S}{\sqrt{S^2 + B^2}} \f$, curve for given number
2957/// of signal and background events; returns cut for maximum significance
2958/// also returned via reference is the maximum significance
2959
2963{
2964 Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, Types::kMaxAnalysisType );
2965
2967 Double_t effS(0),effB(0),significance(0);
2968 TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2969
2970 if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2971 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2972 << "Number of signal or background events is <= 0 ==> abort"
2973 << Endl;
2974 }
2975
2976 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2978
2979 TH1* eff_s = results->GetHist("MVA_EFF_S");
2980 TH1* eff_b = results->GetHist("MVA_EFF_B");
2981
2982 if ( (eff_s==0) || (eff_b==0) ) {
2983 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2984 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2985 return 0;
2986 }
2987
2988 for (Int_t bin=1; bin<=fNbinsH; bin++) {
2989 effS = eff_s->GetBinContent( bin );
2990 effB = eff_b->GetBinContent( bin );
2991
2992 // put significance into a histogram
2994
2995 temp_histogram->SetBinContent(bin,significance);
2996 }
2997
2998 // find maximum in histogram
2999 max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
3000 max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
3001
3002 // delete
3003 delete temp_histogram;
3004
3005 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
3006 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
3007
3008 return max_significance;
3009}
3010
3011////////////////////////////////////////////////////////////////////////////////
3012/// calculates rms,mean, xmin, xmax of the event variable
3013/// this can be either done for the variables as they are or for
3014/// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
3015
3020{
3021 Types::ETreeType previousTreeType = Data()->GetCurrentType();
3022 Data()->SetCurrentType(treeType);
3023
3024 Long64_t entries = Data()->GetNEvents();
3025
3026 // sanity check
3027 if (entries <=0)
3028 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
3029
3030 // index of the wanted variable
3031 UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
3032
3033 // first fill signal and background in arrays before analysis
3034 xmin = +DBL_MAX;
3035 xmax = -DBL_MAX;
3036
3037 // take into account event weights
3038 meanS = 0;
3039 meanB = 0;
3040 rmsS = 0;
3041 rmsB = 0;
3042 Double_t sumwS = 0, sumwB = 0;
3043
3044 // loop over all training events
3045 for (Int_t ievt = 0; ievt < entries; ievt++) {
3046
3047 const Event* ev = GetEvent(ievt);
3048
3049 Double_t theVar = ev->GetValue(varIndex);
3050 Double_t weight = ev->GetWeight();
3051
3052 if (DataInfo().IsSignal(ev)) {
3053 sumwS += weight;
3054 meanS += weight*theVar;
3055 rmsS += weight*theVar*theVar;
3056 }
3057 else {
3058 sumwB += weight;
3059 meanB += weight*theVar;
3060 rmsB += weight*theVar*theVar;
3061 }
3062 xmin = TMath::Min( xmin, theVar );
3063 xmax = TMath::Max( xmax, theVar );
3064 }
3065
3066 meanS = meanS/sumwS;
3067 meanB = meanB/sumwB;
3070
3071 Data()->SetCurrentType(previousTreeType);
3072}
3073
3074////////////////////////////////////////////////////////////////////////////////
3075/// create reader class for method (classification only at present)
3076
3078{
3079 // the default consists of
3081 if (theClassFileName == "")
3082 classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
3083 else
3085
3086 TString className = TString("Read") + GetMethodName();
3087
3089 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
3090 << "Creating standalone class: "
3091 << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
3092
3093 std::ofstream fout( classFileName );
3094 if (!fout.good()) { // file could not be opened --> Error
3095 Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
3096 }
3097
3098 // now create the class
3099 // preamble
3100 fout << "// Class: " << className << std::endl;
3101 fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
3102
3103 // print general information and configuration state
3104 fout << std::endl;
3105 fout << "/* configuration options =====================================================" << std::endl << std::endl;
3106 WriteStateToStream( fout );
3107 fout << std::endl;
3108 fout << "============================================================================ */" << std::endl;
3109
3110 // generate the class
3111 fout << "" << std::endl;
3112 fout << "#include <array>" << std::endl;
3113 fout << "#include <vector>" << std::endl;
3114 fout << "#include <cmath>" << std::endl;
3115 fout << "#include <string>" << std::endl;
3116 fout << "#include <iostream>" << std::endl;
3117 fout << "" << std::endl;
3118 // now if the classifier needs to write some additional classes for its response implementation
3119 // this code goes here: (at least the header declarations need to come before the main class
3120 this->MakeClassSpecificHeader( fout, className );
3121
3122 fout << "#ifndef IClassifierReader__def" << std::endl;
3123 fout << "#define IClassifierReader__def" << std::endl;
3124 fout << std::endl;
3125 fout << "class IClassifierReader {" << std::endl;
3126 fout << std::endl;
3127 fout << " public:" << std::endl;
3128 fout << std::endl;
3129 fout << " // constructor" << std::endl;
3130 fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
3131 fout << " virtual ~IClassifierReader() {}" << std::endl;
3132 fout << std::endl;
3133 fout << " // return classifier response" << std::endl;
3134 if(GetAnalysisType() == Types::kMulticlass) {
3135 fout << " virtual std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3136 } else {
3137 fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3138 }
3139 fout << std::endl;
3140 fout << " // returns classifier status" << std::endl;
3141 fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3142 fout << std::endl;
3143 fout << " protected:" << std::endl;
3144 fout << std::endl;
3145 fout << " bool fStatusIsClean;" << std::endl;
3146 fout << "};" << std::endl;
3147 fout << std::endl;
3148 fout << "#endif" << std::endl;
3149 fout << std::endl;
3150 fout << "class " << className << " : public IClassifierReader {" << std::endl;
3151 fout << std::endl;
3152 fout << " public:" << std::endl;
3153 fout << std::endl;
3154 fout << " // constructor" << std::endl;
3155 fout << " " << className << "( std::vector<std::string>& theInputVars )" << std::endl;
3156 fout << " : IClassifierReader()," << std::endl;
3157 fout << " fClassName( \"" << className << "\" )," << std::endl;
3158 fout << " fNvars( " << GetNvar() << " )" << std::endl;
3159 fout << " {" << std::endl;
3160 fout << " // the training input variables" << std::endl;
3161 fout << " const char* inputVars[] = { ";
3162 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3163 fout << "\"" << GetOriginalVarName(ivar) << "\"";
3164 if (ivar<GetNvar()-1) fout << ", ";
3165 }
3166 fout << " };" << std::endl;
3167 fout << std::endl;
3168 fout << " // sanity checks" << std::endl;
3169 fout << " if (theInputVars.size() <= 0) {" << std::endl;
3170 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3171 fout << " fStatusIsClean = false;" << std::endl;
3172 fout << " }" << std::endl;
3173 fout << std::endl;
3174 fout << " if (theInputVars.size() != fNvars) {" << std::endl;
3175 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3176 fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3177 fout << " fStatusIsClean = false;" << std::endl;
3178 fout << " }" << std::endl;
3179 fout << std::endl;
3180 fout << " // validate input variables" << std::endl;
3181 fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3182 fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3183 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3184 fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3185 fout << " fStatusIsClean = false;" << std::endl;
3186 fout << " }" << std::endl;
3187 fout << " }" << std::endl;
3188 fout << std::endl;
3189 fout << " // initialize min and max vectors (for normalisation)" << std::endl;
3190 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3191 fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
3192 fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
3193 }
3194 fout << std::endl;
3195 fout << " // initialize input variable types" << std::endl;
3196 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3197 fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3198 }
3199 fout << std::endl;
3200 fout << " // initialize constants" << std::endl;
3201 fout << " Initialize();" << std::endl;
3202 fout << std::endl;
3203 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3204 fout << " // initialize transformation" << std::endl;
3205 fout << " InitTransform();" << std::endl;
3206 }
3207 fout << " }" << std::endl;
3208 fout << std::endl;
3209 fout << " // destructor" << std::endl;
3210 fout << " virtual ~" << className << "() {" << std::endl;
3211 fout << " Clear(); // method-specific" << std::endl;
3212 fout << " }" << std::endl;
3213 fout << std::endl;
3214 fout << " // the classifier response" << std::endl;
3215 fout << " // \"inputValues\" is a vector of input values in the same order as the" << std::endl;
3216 fout << " // variables given to the constructor" << std::endl;
3217 if(GetAnalysisType() == Types::kMulticlass) {
3218 fout << " std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const override;" << std::endl;
3219 } else {
3220 fout << " double GetMvaValue( const std::vector<double>& inputValues ) const override;" << std::endl;
3221 }
3222 fout << std::endl;
3223 fout << " private:" << std::endl;
3224 fout << std::endl;
3225 fout << " // method-specific destructor" << std::endl;
3226 fout << " void Clear();" << std::endl;
3227 fout << std::endl;
3228 if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3229 fout << " // input variable transformation" << std::endl;
3230 GetTransformationHandler().MakeFunction(fout, className,1);
3231 fout << " void InitTransform();" << std::endl;
3232 fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3233 fout << std::endl;
3234 }
3235 fout << " // common member variables" << std::endl;
3236 fout << " const char* fClassName;" << std::endl;
3237 fout << std::endl;
3238 fout << " const size_t fNvars;" << std::endl;
3239 fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3240 fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3241 fout << std::endl;
3242 fout << " // normalisation of input variables" << std::endl;
3243 fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3244 fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3245 fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3246 fout << " // normalise to output range: [-1, 1]" << std::endl;
3247 fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3248 fout << " }" << std::endl;
3249 fout << std::endl;
3250 fout << " // type of input variable: 'F' or 'I'" << std::endl;
3251 fout << " char fType[" << GetNvar() << "];" << std::endl;
3252 fout << std::endl;
3253 fout << " // initialize internal variables" << std::endl;
3254 fout << " void Initialize();" << std::endl;
3255 if(GetAnalysisType() == Types::kMulticlass) {
3256 fout << " std::vector<double> GetMulticlassValues__( const std::vector<double>& inputValues ) const;" << std::endl;
3257 } else {
3258 fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3259 }
3260 fout << "" << std::endl;
3261 fout << " // private members (method specific)" << std::endl;
3262
3263 // call the classifier specific output (the classifier must close the class !)
3264 MakeClassSpecific( fout, className );
3265
3266 if(GetAnalysisType() == Types::kMulticlass) {
3267 fout << "inline std::vector<double> " << className << "::GetMulticlassValues( const std::vector<double>& inputValues ) const" << std::endl;
3268 } else {
3269 fout << "inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3270 }
3271 fout << "{" << std::endl;
3272 fout << " // classifier response value" << std::endl;
3273 if(GetAnalysisType() == Types::kMulticlass) {
3274 fout << " std::vector<double> retval;" << std::endl;
3275 } else {
3276 fout << " double retval = 0;" << std::endl;
3277 }
3278 fout << std::endl;
3279 fout << " // classifier response, sanity check first" << std::endl;
3280 fout << " if (!IsStatusClean()) {" << std::endl;
3281 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3282 fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3283 fout << " }" << std::endl;
3284 fout << " else {" << std::endl;
3285 if (IsNormalised()) {
3286 fout << " // normalise variables" << std::endl;
3287 fout << " std::vector<double> iV;" << std::endl;
3288 fout << " iV.reserve(inputValues.size());" << std::endl;
3289 fout << " int ivar = 0;" << std::endl;
3290 fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3291 fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3292 fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3293 fout << " }" << std::endl;
3294 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3295 GetMethodType() != Types::kHMatrix) {
3296 fout << " Transform( iV, -1 );" << std::endl;
3297 }
3298
3299 if(GetAnalysisType() == Types::kMulticlass) {
3300 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3301 } else {
3302 fout << " retval = GetMvaValue__( iV );" << std::endl;
3303 }
3304 } else {
3305 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3306 GetMethodType() != Types::kHMatrix) {
3307 fout << " std::vector<double> iV(inputValues);" << std::endl;
3308 fout << " Transform( iV, -1 );" << std::endl;
3309 if(GetAnalysisType() == Types::kMulticlass) {
3310 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3311 } else {
3312 fout << " retval = GetMvaValue__( iV );" << std::endl;
3313 }
3314 } else {
3315 if(GetAnalysisType() == Types::kMulticlass) {
3316 fout << " retval = GetMulticlassValues__( inputValues );" << std::endl;
3317 } else {
3318 fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3319 }
3320 }
3321 }
3322 fout << " }" << std::endl;
3323 fout << std::endl;
3324 fout << " return retval;" << std::endl;
3325 fout << "}" << std::endl;
3326
3327 // create output for transformation - if any
3328 if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3329 GetTransformationHandler().MakeFunction(fout, className,2);
3330
3331 // close the file
3332 fout.close();
3333}
3334
3335////////////////////////////////////////////////////////////////////////////////
3336/// prints out method-specific help method
3337
3339{
3340 // if options are written to reference file, also append help info
3341 std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3342 std::ofstream* o = 0;
3343 if (gConfig().WriteOptionsReference()) {
3344 Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3345 o = new std::ofstream( GetReferenceFile(), std::ios::app );
3346 if (!o->good()) { // file could not be opened --> Error
3347 Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3348 }
3349 std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3350 }
3351
3352 // "|--------------------------------------------------------------|"
3353 if (!o) {
3354 Log() << kINFO << Endl;
3355 Log() << gTools().Color("bold")
3356 << "================================================================"
3357 << gTools().Color( "reset" )
3358 << Endl;
3359 Log() << gTools().Color("bold")
3360 << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3361 << gTools().Color( "reset" )
3362 << Endl;
3363 }
3364 else {
3365 Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3366 }
3367
3368 // print method-specific help message
3369 GetHelpMessage();
3370
3371 if (!o) {
3372 Log() << Endl;
3373 Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3374 Log() << gTools().Color("bold")
3375 << "================================================================"
3376 << gTools().Color( "reset" )
3377 << Endl;
3378 Log() << Endl;
3379 }
3380 else {
3381 // indicate END
3382 Log() << "# End of Message___" << Endl;
3383 }
3384
3385 std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3386 if (o) o->close();
3387}
3388
3389// ----------------------- r o o t f i n d i n g ----------------------------
3390
3391////////////////////////////////////////////////////////////////////////////////
3392/// returns efficiency as function of cut
3393
3395{
3396 Double_t retval=0;
3397
3398 // retrieve the class object
3400 retval = fSplRefS->Eval( theCut );
3401 }
3402 else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3403
3404 // caution: here we take some "forbidden" action to hide a problem:
3405 // in some cases, in particular for likelihood, the binned efficiency distributions
3406 // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3407 // unbinned information available in the trees, but the unbinned minimization is
3408 // too slow, and we don't need to do a precision measurement here. Hence, we force
3409 // this property.
3410 Double_t eps = 1.0e-5;
3411 if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3412 else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3413
3414 return retval;
3415}
3416
3417////////////////////////////////////////////////////////////////////////////////
3418/// returns the event collection (i.e. the dataset) TRANSFORMED using the
3419/// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3420
3422{
3423 // if there's no variable transformation for this classifier, just hand back the
3424 // event collection of the data set
3425 if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3426 return (Data()->GetEventCollection(type));
3427 }
3428
3429 // otherwise, transform ALL the events and hand back the vector of the pointers to the
3430 // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3431 // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3432 Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3433 if (fEventCollections.at(idx) == 0) {
3434 fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3435 fEventCollections.at(idx) = GetTransformationHandler().CalcTransformations(*(fEventCollections.at(idx)),kTRUE);
3436 }
3437 return *(fEventCollections.at(idx));
3438}
3439
3440////////////////////////////////////////////////////////////////////////////////
3441/// calculates the TMVA version string from the training version code on the fly
3442
3444{
3445 UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3446 UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3447 UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3448
3449 return TString::Format("%i.%i.%i",a,b,c);
3450}
3451
3452////////////////////////////////////////////////////////////////////////////////
3453/// calculates the ROOT version string from the training version code on the fly
3454
3456{
3457 UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3458 UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3459 UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3460
3461 return TString::Format("%i.%02i/%02i",a,b,c);
3462}
3463
3464////////////////////////////////////////////////////////////////////////////////
3465
3468 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
3469
3470 if (mvaRes != NULL) {
3471 TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3472 TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3473 TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3474 TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3475
3476 if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3477
3478 if (SorB == 's' || SorB == 'S')
3479 return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3480 else
3481 return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3482 }
3483 return -1;
3484}
const Bool_t Use_Splines_for_Eff_
const Int_t NBIN_HIST_HIGH
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
#define s1(x)
Definition RSha256.hxx:91
#define ROOT_VERSION_CODE
Definition RVersion.hxx:24
bool Bool_t
Boolean (0=false, 1=true) (bool)
Definition RtypesCore.h:77
int Int_t
Signed integer 4 bytes (int)
Definition RtypesCore.h:59
char Char_t
Character 1 byte (char)
Definition RtypesCore.h:51
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char y2
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char y1
char name[80]
Definition TGX11.cxx:110
float xmin
float xmax
TMatrixT< Double_t > TMatrixD
Definition TMatrixDfwd.h:23
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2495
R__EXTERN TSystem * gSystem
Definition TSystem.h:582
#define TMVA_VERSION_CODE
Definition Version.h:47
const_iterator begin() const
const_iterator end() const
Class to manage histogram axis.
Definition TAxis.h:32
Double_t GetXmax() const
Definition TAxis.h:142
Double_t GetXmin() const
Definition TAxis.h:141
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
Definition TDatime.h:37
Describe directory structure in memory.
Definition TDirectory.h:45
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:130
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3765
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:926
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:878
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition TH1.cxx:7627
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition TH1.cxx:1289
virtual Int_t GetQuantiles(Int_t n, Double_t *xp, const Double_t *p=nullptr)
Compute Quantiles for this histogram.
Definition TH1.cxx:4649
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition TH1.cxx:747
2-D histogram with a float per channel (see TH1 documentation)
Definition TH2.h:345
Int_t Fill(Double_t) override
Invalid Fill method.
Definition TH2.cxx:363
A doubly linked list.
Definition TList.h:38
Class that contains all the information of a class.
Definition ClassInfo.h:49
TString fWeightFileExtension
Definition Config.h:125
VariablePlotting & GetVariablePlotting()
Definition Config.h:97
class TMVA::Config::VariablePlotting fVariablePlotting
IONames & GetIONames()
Definition Config.h:98
MsgLogger * fLogger
! message logger
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
Definition DataSet.h:58
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:399
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:408
Interface for all concrete MVA method implementations.
Definition IMethod.h:53
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
IPythonInteractive()
standard constructor
~IPythonInteractive()
standard destructor
void ClearGraphs()
This function sets the point number to 0 for all graphs.
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Virtual base Class for all MVA method.
Definition MethodBase.h:111
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
void PrintHelpMessage() const override
prints out method-specific help method
virtual std::vector< Float_t > GetAllMulticlassValues()
Get all multi-class values.
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
const char * GetName() const override
Definition MethodBase.h:337
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
void SetWeightFileDir(TString fileDir)
set directory of weight file
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables,...
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual Double_t GetSignificance() const
compute significance of mean difference
virtual Double_t GetProba(const Event *ev)
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual std::vector< Double_t > GetDataMvaValues(DataSet *data=nullptr, Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the given Data type
void SetupMethod()
setup of methods
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
virtual ~MethodBase()
destructor
void WriteMonitoringHistosToFile() const override
write special monitoring histograms to file dummy implementation here --------------—
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, , curve for given number of signal and background events; returns cut for maximum ...
virtual Double_t GetTrainingEfficiency(const TString &)
void SetWeightFileName(TString)
set the weight file name (depreciated)
TString GetWeightFileName() const
retrieve weight file name
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void InitBase()
default initialization called by all constructors
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
void MakeClass(const TString &classFileName=TString("")) const override
create reader class for method (classification only at present)
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
void ReadStateFromFile()
Function to write options and weights to file.
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream.
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
void SetTestvarName(const TString &v="")
Definition MethodBase.h:344
void ReadVariablesFromXML(void *varnode)
read variable info from XML
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables,...
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity:
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters according to the argument This is just a dummy .
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
void AddVarsXMLTo(void *parent) const
write variable info to XML
Double_t GetMvaValue(Double_t *errLower=nullptr, Double_t *errUpper=nullptr) override=0
void AddTargetsXMLTo(void *parent) const
write target info to XML
void ReadTargetsFromXML(void *tarnode)
read target info from XML
void ProcessBaseOptions()
the option string is decoded, for available options see "DeclareOptions"
void ReadStateFromXML(void *parent)
virtual std::vector< Float_t > GetAllRegressionValues()
Get al regression values in one call.
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
void AddClassesXMLTo(void *parent) const
write class info to XML
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
void SetSource(const std::string &source)
Definition MsgLogger.h:68
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
@ kSpline3
Definition PDF.h:70
@ kSpline2
Definition PDF.h:70
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
Definition Results.h:57
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition RootFinder.h:48
Linear interpolation of TGraph.
Definition TSpline1.h:43
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition Tools.cxx:203
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition Tools.cxx:376
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition Tools.cxx:122
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition Tools.cxx:564
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:803
TXMLEngine & xmlengine()
Definition Tools.h:262
Bool_t CheckSplines(const TH1 *, const TSpline *)
check quality of splining by comparing splines and histograms in each bin
Definition Tools.cxx:454
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1125
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition Tools.cxx:358
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1099
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1137
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Definition Types.h:135
@ kBackground
Definition Types.h:136
@ kLikelihood
Definition Types.h:79
@ kHMatrix
Definition Types.h:81
@ kMulticlass
Definition Types.h:129
@ kNoAnalysisType
Definition Types.h:130
@ kClassification
Definition Types.h:127
@ kMaxAnalysisType
Definition Types.h:131
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
Linear interpolation class.
Gaussian Transformation of input variables.
Class for type info of MVA input variable.
Linear interpolation class.
Linear interpolation class.
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition TMultiGraph.h:34
Collectable string class.
Definition TObjString.h:28
Basic string class.
Definition TString.h:138
void ToLower()
Change string to lower-case.
Definition TString.cxx:1189
Int_t Atoi() const
Return integer value of string.
Definition TString.cxx:1994
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition TString.cxx:1170
const char * Data() const
Definition TString.h:384
@ kLeading
Definition TString.h:284
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:660
virtual const char * GetBuildNode() const
Return the build node name.
Definition TSystem.cxx:3943
virtual int mkdir(const char *name, Bool_t recursive=kFALSE)
Make a file system directory.
Definition TSystem.cxx:916
virtual const char * WorkingDirectory()
Return working directory.
Definition TSystem.cxx:881
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition TSystem.cxx:1612
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
TLine * line
Double_t x[n]
Definition legend1.C:17
TH1F * h1
Definition legend1.C:5
Config & gConfig()
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:249
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:673
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:197
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:122
static void output()