Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodBase.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21 * *
22 * Copyright (c) 2005-2011: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * U. of Bonn, Germany *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (http://tmva.sourceforge.net/LICENSE) *
31 * *
32 **********************************************************************************/
33
34/*! \class TMVA::MethodBase
35\ingroup TMVA
36
37 Virtual base Class for all MVA method
38
39 MethodBase hosts several specific evaluation methods.
40
41 The kind of MVA that provides optimal performance in an analysis strongly
42 depends on the particular application. The evaluation factory provides a
43 number of numerical benchmark results to directly assess the performance
44 of the MVA training on the independent test sample. These are:
45
46 - The _signal efficiency_ at three representative background efficiencies
47 (which is 1 &minus; rejection).
48 - The _significance_ of an MVA estimator, defined by the difference
49 between the MVA mean values for signal and background, divided by the
50 quadratic sum of their root mean squares.
51 - The _separation_ of an MVA _x_, defined by the integral
52 \f[
53 \frac{1}{2} \int \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx
54 \f]
55 where
56 \f$ S(x) \f$ and \f$ B(x) \f$ are the signal and background distributions,
57 respectively. The separation is zero for identical signal and background MVA
58 shapes, and it is one for disjunctive shapes.
59 - The average, \f$ \int x \mu (S(x)) dx \f$, of the signal \f$ \mu_{transform} \f$.
60 The \f$ \mu_{transform} \f$ of an MVA denotes the transformation that yields
61 a uniform background distribution. In this way, the signal distributions
62 \f$ S(x) \f$ can be directly compared among the various MVAs. The stronger
63 \f$ S(x) \f$ peaks towards one, the better is the discrimination of the MVA.
64 The \f$ \mu_{transform} \f$ is
65 [documented here](http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html).
66
67 The MVA standard output also prints the linear correlation coefficients between
68 signal and background, which can be useful to eliminate variables that exhibit too
69 strong correlations.
70*/
71
72#include "TMVA/MethodBase.h"
73
74#include "TMVA/Config.h"
75#include "TMVA/Configurable.h"
76#include "TMVA/DataSetInfo.h"
77#include "TMVA/DataSet.h"
78#include "TMVA/Factory.h"
79#include "TMVA/IMethod.h"
80#include "TMVA/MsgLogger.h"
81#include "TMVA/PDF.h"
82#include "TMVA/Ranking.h"
83#include "TMVA/DataLoader.h"
84#include "TMVA/Tools.h"
85#include "TMVA/Results.h"
89#include "TMVA/RootFinder.h"
90#include "TMVA/Timer.h"
91#include "TMVA/TSpline1.h"
92#include "TMVA/Types.h"
96#include "TMVA/VariableInfo.h"
100#include "TMVA/Version.h"
101
102#include "TROOT.h"
103#include "TSystem.h"
104#include "TObjString.h"
105#include "TQObject.h"
106#include "TSpline.h"
107#include "TMatrix.h"
108#include "TMath.h"
109#include "TH1F.h"
110#include "TH2F.h"
111#include "TFile.h"
112#include "TGraph.h"
113#include "TXMLEngine.h"
114
115#include <iomanip>
116#include <iostream>
117#include <fstream>
118#include <sstream>
119#include <cstdlib>
120#include <algorithm>
121#include <limits>
122
123
125
126using std::endl;
127using std::atof;
128
129//const Int_t MethodBase_MaxIterations_ = 200;
131
132//const Int_t NBIN_HIST_PLOT = 100;
133const Int_t NBIN_HIST_HIGH = 10000;
134
135#ifdef _WIN32
136/* Disable warning C4355: 'this' : used in base member initializer list */
137#pragma warning ( disable : 4355 )
138#endif
139
140
141#include "TMultiGraph.h"
142
143////////////////////////////////////////////////////////////////////////////////
144/// standard constructor
145
147{
148 fNumGraphs = 0;
149 fIndex = 0;
150}
151
152////////////////////////////////////////////////////////////////////////////////
153/// standard destructor
155{
156 if (fMultiGraph){
157 delete fMultiGraph;
158 fMultiGraph = nullptr;
159 }
160 return;
161}
162
163////////////////////////////////////////////////////////////////////////////////
164/// This function gets some title and it creates a TGraph for every title.
165/// It also sets up the style for every TGraph. All graphs are added to a single TMultiGraph.
166///
167/// \param[in] graphTitles vector of titles
168
169void TMVA::IPythonInteractive::Init(std::vector<TString>& graphTitles)
170{
171 if (fNumGraphs!=0){
172 std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
173 return;
174 }
175 Int_t color = 2;
176 for(auto& title : graphTitles){
177 fGraphs.push_back( new TGraph() );
178 fGraphs.back()->SetTitle(title);
179 fGraphs.back()->SetName(title);
180 fGraphs.back()->SetFillColor(color);
181 fGraphs.back()->SetLineColor(color);
182 fGraphs.back()->SetMarkerColor(color);
183 fMultiGraph->Add(fGraphs.back());
184 color += 2;
185 fNumGraphs += 1;
186 }
187 return;
188}
189
190////////////////////////////////////////////////////////////////////////////////
191/// This function sets the point number to 0 for all graphs.
192
194{
195 for(Int_t i=0; i<fNumGraphs; i++){
196 fGraphs[i]->Set(0);
197 }
198}
199
200////////////////////////////////////////////////////////////////////////////////
201/// This function is used only in 2 TGraph case, and it will add new data points to graphs.
202///
203/// \param[in] x the x coordinate
204/// \param[in] y1 the y coordinate for the first TGraph
205/// \param[in] y2 the y coordinate for the second TGraph
206
208{
209 fGraphs[0]->Set(fIndex+1);
210 fGraphs[1]->Set(fIndex+1);
211 fGraphs[0]->SetPoint(fIndex, x, y1);
212 fGraphs[1]->SetPoint(fIndex, x, y2);
213 fIndex++;
214 return;
215}
216
217////////////////////////////////////////////////////////////////////////////////
218/// This function can add data points to as many TGraphs as we have.
219///
220/// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
221/// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
222
223void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
224{
225 for(Int_t i=0; i<fNumGraphs;i++){
226 fGraphs[i]->Set(fIndex+1);
227 fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
228 }
229 fIndex++;
230 return;
231}
232
233
234////////////////////////////////////////////////////////////////////////////////
235/// standard constructor
236
238 Types::EMVA methodType,
239 const TString& methodTitle,
240 DataSetInfo& dsi,
241 const TString& theOption) :
242 IMethod(),
243 Configurable ( theOption ),
244 fTmpEvent ( 0 ),
245 fRanking ( 0 ),
246 fInputVars ( 0 ),
247 fAnalysisType ( Types::kNoAnalysisType ),
248 fRegressionReturnVal ( 0 ),
249 fMulticlassReturnVal ( 0 ),
250 fDataSetInfo ( dsi ),
251 fSignalReferenceCut ( 0.5 ),
252 fSignalReferenceCutOrientation( 1. ),
253 fVariableTransformType ( Types::kSignal ),
254 fJobName ( jobName ),
255 fMethodName ( methodTitle ),
256 fMethodType ( methodType ),
257 fTestvar ( "" ),
258 fTMVATrainingVersion ( TMVA_VERSION_CODE ),
259 fROOTTrainingVersion ( ROOT_VERSION_CODE ),
260 fConstructedFromWeightFile ( kFALSE ),
261 fBaseDir ( 0 ),
262 fMethodBaseDir ( 0 ),
263 fFile ( 0 ),
264 fSilentFile (kFALSE),
265 fModelPersistence (kTRUE),
266 fWeightFile ( "" ),
267 fEffS ( 0 ),
268 fDefaultPDF ( 0 ),
269 fMVAPdfS ( 0 ),
270 fMVAPdfB ( 0 ),
271 fSplS ( 0 ),
272 fSplB ( 0 ),
273 fSpleffBvsS ( 0 ),
274 fSplTrainS ( 0 ),
275 fSplTrainB ( 0 ),
276 fSplTrainEffBvsS ( 0 ),
277 fVarTransformString ( "None" ),
278 fTransformationPointer ( 0 ),
279 fTransformation ( dsi, methodTitle ),
280 fVerbose ( kFALSE ),
281 fVerbosityLevelString ( "Default" ),
282 fHelp ( kFALSE ),
283 fHasMVAPdfs ( kFALSE ),
284 fIgnoreNegWeightsInTraining( kFALSE ),
285 fSignalClass ( 0 ),
286 fBackgroundClass ( 0 ),
287 fSplRefS ( 0 ),
288 fSplRefB ( 0 ),
289 fSplTrainRefS ( 0 ),
290 fSplTrainRefB ( 0 ),
291 fSetupCompleted (kFALSE)
292{
295
296// // default extension for weight files
297}
298
299////////////////////////////////////////////////////////////////////////////////
300/// constructor used for Testing + Application of the MVA,
301/// only (no training), using given WeightFiles
302
304 DataSetInfo& dsi,
305 const TString& weightFile ) :
306 IMethod(),
307 Configurable(""),
308 fTmpEvent ( 0 ),
309 fRanking ( 0 ),
310 fInputVars ( 0 ),
311 fAnalysisType ( Types::kNoAnalysisType ),
312 fRegressionReturnVal ( 0 ),
313 fMulticlassReturnVal ( 0 ),
314 fDataSetInfo ( dsi ),
315 fSignalReferenceCut ( 0.5 ),
316 fVariableTransformType ( Types::kSignal ),
317 fJobName ( "" ),
318 fMethodName ( "MethodBase" ),
319 fMethodType ( methodType ),
320 fTestvar ( "" ),
321 fTMVATrainingVersion ( 0 ),
322 fROOTTrainingVersion ( 0 ),
323 fConstructedFromWeightFile ( kTRUE ),
324 fBaseDir ( 0 ),
325 fMethodBaseDir ( 0 ),
326 fFile ( 0 ),
327 fSilentFile (kFALSE),
328 fModelPersistence (kTRUE),
329 fWeightFile ( weightFile ),
330 fEffS ( 0 ),
331 fDefaultPDF ( 0 ),
332 fMVAPdfS ( 0 ),
333 fMVAPdfB ( 0 ),
334 fSplS ( 0 ),
335 fSplB ( 0 ),
336 fSpleffBvsS ( 0 ),
337 fSplTrainS ( 0 ),
338 fSplTrainB ( 0 ),
339 fSplTrainEffBvsS ( 0 ),
340 fVarTransformString ( "None" ),
341 fTransformationPointer ( 0 ),
342 fTransformation ( dsi, "" ),
343 fVerbose ( kFALSE ),
344 fVerbosityLevelString ( "Default" ),
345 fHelp ( kFALSE ),
346 fHasMVAPdfs ( kFALSE ),
347 fIgnoreNegWeightsInTraining( kFALSE ),
348 fSignalClass ( 0 ),
349 fBackgroundClass ( 0 ),
350 fSplRefS ( 0 ),
351 fSplRefB ( 0 ),
352 fSplTrainRefS ( 0 ),
353 fSplTrainRefB ( 0 ),
354 fSetupCompleted (kFALSE)
355{
357// // constructor used for Testing + Application of the MVA,
358// // only (no training), using given WeightFiles
359}
360
361////////////////////////////////////////////////////////////////////////////////
362/// destructor
363
365{
366 // destructor
367 if (!fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
368
369 // destructor
370 if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
371 if (fRanking != 0) delete fRanking;
372
373 // PDFs
374 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
375 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
376 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
377
378 // Splines
379 if (fSplS) { delete fSplS; fSplS = 0; }
380 if (fSplB) { delete fSplB; fSplB = 0; }
381 if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
382 if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
383 if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
384 if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
385 if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
386 if (fSplTrainEffBvsS) { delete fSplTrainEffBvsS; fSplTrainEffBvsS = 0; }
387
388 for (Int_t i = 0; i < 2; i++ ) {
389 if (fEventCollections.at(i)) {
390 for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
391 it != fEventCollections.at(i)->end(); ++it) {
392 delete (*it);
393 }
394 delete fEventCollections.at(i);
395 fEventCollections.at(i) = 0;
396 }
397 }
398
399 if (fRegressionReturnVal) delete fRegressionReturnVal;
400 if (fMulticlassReturnVal) delete fMulticlassReturnVal;
401}
402
403////////////////////////////////////////////////////////////////////////////////
404/// setup of methods
405
407{
408 // setup of methods
409
410 if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
411 InitBase();
412 DeclareBaseOptions();
413 Init();
414 DeclareOptions();
415 fSetupCompleted = kTRUE;
416}
417
418////////////////////////////////////////////////////////////////////////////////
419/// process all options
420/// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
421/// (sometimes, eg, fitters are used which can only be implemented during training phase)
422
424{
425 ProcessBaseOptions();
426 ProcessOptions();
427}
428
429////////////////////////////////////////////////////////////////////////////////
430/// check may be overridden by derived class
431/// (sometimes, eg, fitters are used which can only be implemented during training phase)
432
434{
435 CheckForUnusedOptions();
436}
437
438////////////////////////////////////////////////////////////////////////////////
439/// default initialization called by all constructors
440
442{
443 SetConfigDescription( "Configuration options for classifier architecture and tuning" );
444
446 fNbinsMVAoutput = gConfig().fVariablePlotting.fNbinsMVAoutput;
447 fNbinsH = NBIN_HIST_HIGH;
448
449 fSplTrainS = 0;
450 fSplTrainB = 0;
451 fSplTrainEffBvsS = 0;
452 fMeanS = -1;
453 fMeanB = -1;
454 fRmsS = -1;
455 fRmsB = -1;
456 fXmin = DBL_MAX;
457 fXmax = -DBL_MAX;
458 fTxtWeightsOnly = kTRUE;
459 fSplRefS = 0;
460 fSplRefB = 0;
461
462 fTrainTime = -1.;
463 fTestTime = -1.;
464
465 fRanking = 0;
466
467 // temporary until the move to DataSet is complete
468 fInputVars = new std::vector<TString>;
469 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
470 fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
471 }
472 fRegressionReturnVal = 0;
473 fMulticlassReturnVal = 0;
474
475 fEventCollections.resize( 2 );
476 fEventCollections.at(0) = 0;
477 fEventCollections.at(1) = 0;
478
479 // retrieve signal and background class index
480 if (DataInfo().GetClassInfo("Signal") != 0) {
481 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
482 }
483 if (DataInfo().GetClassInfo("Background") != 0) {
484 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
485 }
486
487 SetConfigDescription( "Configuration options for MVA method" );
488 SetConfigName( TString("Method") + GetMethodTypeName() );
489}
490
491////////////////////////////////////////////////////////////////////////////////
492/// define the options (their key words) that can be set in the option string
493/// here the options valid for ALL MVA methods are declared.
494///
495/// know options:
496///
497/// - VariableTransform=None,Decorrelated,PCA to use transformed variables
498/// instead of the original ones
499/// - VariableTransformType=Signal,Background which decorrelation matrix to use
500/// in the method. Only the Likelihood
501/// Method can make proper use of independent
502/// transformations of signal and background
503/// - fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
504/// - fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
505/// - fHasMVAPdfs create PDFs for the MVA outputs
506/// - V for Verbose output (!V) for non verbos
507/// - H for Help message
508
510{
511 DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
512
513 DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
514 AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
515 AddPreDefVal( TString("Debug") );
516 AddPreDefVal( TString("Verbose") );
517 AddPreDefVal( TString("Info") );
518 AddPreDefVal( TString("Warning") );
519 AddPreDefVal( TString("Error") );
520 AddPreDefVal( TString("Fatal") );
521
522 // If True (default): write all training results (weights) as text files only;
523 // if False: write also in ROOT format (not available for all methods - will abort if not
524 fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
525 fNormalise = kFALSE; // OBSOLETE !!!
526
527 DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
528
529 DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
530
531 DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
532
533 DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
534 "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
535}
536
537////////////////////////////////////////////////////////////////////////////////
538/// the option string is decoded, for available options see "DeclareOptions"
539
541{
542 if (HasMVAPdfs()) {
543 // setting the default bin num... maybe should be static ? ==> Please no static (JS)
544 // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
545 // reading every PDF's definition and passing the option string to the next one to be read and marked
546 fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
547 fDefaultPDF->DeclareOptions();
548 fDefaultPDF->ParseOptions();
549 fDefaultPDF->ProcessOptions();
550 fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
551 fMVAPdfB->DeclareOptions();
552 fMVAPdfB->ParseOptions();
553 fMVAPdfB->ProcessOptions();
554 fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
555 fMVAPdfS->DeclareOptions();
556 fMVAPdfS->ParseOptions();
557 fMVAPdfS->ProcessOptions();
558
559 // the final marked option string is written back to the original methodbase
560 SetOptions( fMVAPdfS->GetOptions() );
561 }
562
563 TMVA::CreateVariableTransforms( fVarTransformString,
564 DataInfo(),
565 GetTransformationHandler(),
566 Log() );
567
568 if (!HasMVAPdfs()) {
569 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
570 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
571 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
572 }
573
574 if (fVerbose) { // overwrites other settings
575 fVerbosityLevelString = TString("Verbose");
576 Log().SetMinType( kVERBOSE );
577 }
578 else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
579 else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
580 else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
581 else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
582 else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
583 else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
584 else if (fVerbosityLevelString != "Default" ) {
585 Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
586 << fVerbosityLevelString << "' unknown." << Endl;
587 }
588 Event::SetIgnoreNegWeightsInTraining(fIgnoreNegWeightsInTraining);
589}
590
591////////////////////////////////////////////////////////////////////////////////
592/// options that are used ONLY for the READER to ensure backward compatibility
593/// they are hence without any effect (the reader is only reading the training
594/// options that HAD been used at the training of the .xml weight file at hand
595
597{
598 DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
599 DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
600 DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
601 "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
602 AddPreDefVal( TString("Signal") );
603 AddPreDefVal( TString("Background") );
604 DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
605 // Why on earth ?? was this here? Was the verbosity level option meant to 'disappear? Not a good idea i think..
606 // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
607 // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
608 // AddPreDefVal( TString("Debug") );
609 // AddPreDefVal( TString("Verbose") );
610 // AddPreDefVal( TString("Info") );
611 // AddPreDefVal( TString("Warning") );
612 // AddPreDefVal( TString("Error") );
613 // AddPreDefVal( TString("Fatal") );
614 DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
615 DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
616}
617
618
619////////////////////////////////////////////////////////////////////////////////
620/// call the Optimizer with the set of parameters and ranges that
621/// are meant to be tuned.
622
623std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
624{
625 // this is just a dummy... needs to be implemented for each method
626 // individually (as long as we don't have it automatized via the
627 // configuration string
628
629 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
630 << GetName() << Endl;
631 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
632
633 std::map<TString,Double_t> tunedParameters;
634 tunedParameters.size(); // just to get rid of "unused" warning
635 return tunedParameters;
636
637}
638
639////////////////////////////////////////////////////////////////////////////////
640/// set the tuning parameters according to the argument
641/// This is just a dummy .. have a look at the MethodBDT how you could
642/// perhaps implement the same thing for the other Classifiers..
643
644void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
645{
646}
647
648////////////////////////////////////////////////////////////////////////////////
649
651{
652 Data()->SetCurrentType(Types::kTraining);
653 Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
654
655 // train the MVA method
656 if (Help()) PrintHelpMessage();
657
658 // all histograms should be created in the method's subdirectory
659 if(!IsSilentFile()) BaseDir()->cd();
660
661 // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
662 // needed for this classifier
663 GetTransformationHandler().CalcTransformations(Data()->GetEventCollection());
664
665 // call training of derived MVA
666 Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
667 << "Begin training" << Endl;
668 Long64_t nEvents = Data()->GetNEvents();
669 Timer traintimer( nEvents, GetName(), kTRUE );
670 Train();
671 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
672 << "\tEnd of training " << Endl;
673 SetTrainTime(traintimer.ElapsedSeconds());
674 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
675 << "Elapsed time for training with " << nEvents << " events: "
676 << traintimer.GetElapsedTime() << " " << Endl;
677
678 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
679 << "\tCreate MVA output for ";
680
681 // create PDFs for the signal and background MVA distributions (if required)
682 if (DoMulticlass()) {
683 Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
684 AddMulticlassOutput(Types::kTraining);
685 }
686 else if (!DoRegression()) {
687
688 Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
689 AddClassifierOutput(Types::kTraining);
690 if (HasMVAPdfs()) {
691 CreateMVAPdfs();
692 AddClassifierOutputProb(Types::kTraining);
693 }
694
695 } else {
696
697 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
698 AddRegressionOutput( Types::kTraining );
699
700 if (HasMVAPdfs() ) {
701 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
702 CreateMVAPdfs();
703 }
704 }
705
706 // write the current MVA state into stream
707 // produced are one text file and one ROOT file
708 if (fModelPersistence ) WriteStateToFile();
709
710 // produce standalone make class (presently only supported for classification)
711 if ((!DoRegression()) && (fModelPersistence)) MakeClass();
712
713 // write additional monitoring histograms to main target file (not the weight file)
714 // again, make sure the histograms go into the method's subdirectory
715 if(!IsSilentFile())
716 {
717 BaseDir()->cd();
718 WriteMonitoringHistosToFile();
719 }
720}
721
722////////////////////////////////////////////////////////////////////////////////
723
725{
726 if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
727 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
728 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), Types::kTesting, Types::kRegression);
729 bool truncate = false;
730 TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
731 stddev = sqrt(h1->GetMean());
732 truncate = true;
733 Double_t yq[1], xq[]={0.9};
734 h1->GetQuantiles(1,yq,xq);
735 TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
736 stddev90Percent = sqrt(h2->GetMean());
737 delete h1;
738 delete h2;
739}
740
741////////////////////////////////////////////////////////////////////////////////
742/// prepare tree branch with the method's discriminating variable
743
745{
746 Data()->SetCurrentType(type);
747
748 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
749
750 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), type, Types::kRegression);
751
752 Long64_t nEvents = Data()->GetNEvents();
753
754 // use timer
755 Timer timer( nEvents, GetName(), kTRUE );
756 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
757 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
758
759 regRes->Resize( nEvents );
760
761 // Drawing the progress bar every event was causing a huge slowdown in the evaluation time
762 // So we set some parameters to draw the progress bar a total of totalProgressDraws, i.e. only draw every 1 in 100
763
764 Int_t totalProgressDraws = 100; // total number of times to update the progress bar
765 Int_t drawProgressEvery = 1; // draw every nth event such that we have a total of totalProgressDraws
766 if(nEvents >= totalProgressDraws) drawProgressEvery = nEvents/totalProgressDraws;
767
768 for (Int_t ievt=0; ievt<nEvents; ievt++) {
769
770 Data()->SetCurrentEvent(ievt);
771 std::vector< Float_t > vals = GetRegressionValues();
772 regRes->SetValue( vals, ievt );
773
774 // Only draw the progress bar once in a while, doing this every event causes the evaluation to be ridiculously slow
775 if(ievt % drawProgressEvery == 0 || ievt==nEvents-1) timer.DrawProgressBar( ievt );
776 }
777
778 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
779 << "Elapsed time for evaluation of " << nEvents << " events: "
780 << timer.GetElapsedTime() << " " << Endl;
781
782 // store time used for testing
784 SetTestTime(timer.ElapsedSeconds());
785
786 TString histNamePrefix(GetTestvarName());
787 histNamePrefix += (type==Types::kTraining?"train":"test");
788 regRes->CreateDeviationHistograms( histNamePrefix );
789}
790
791////////////////////////////////////////////////////////////////////////////////
792/// prepare tree branch with the method's discriminating variable
793
795{
796 Data()->SetCurrentType(type);
797
798 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
799
800 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
801 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
802
803 Long64_t nEvents = Data()->GetNEvents();
804
805 // use timer
806 Timer timer( nEvents, GetName(), kTRUE );
807
808 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
809 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
810
811 resMulticlass->Resize( nEvents );
812 for (Int_t ievt=0; ievt<nEvents; ievt++) {
813 Data()->SetCurrentEvent(ievt);
814 std::vector< Float_t > vals = GetMulticlassValues();
815 resMulticlass->SetValue( vals, ievt );
816 timer.DrawProgressBar( ievt );
817 }
818
819 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
820 << "Elapsed time for evaluation of " << nEvents << " events: "
821 << timer.GetElapsedTime() << " " << Endl;
822
823 // store time used for testing
825 SetTestTime(timer.ElapsedSeconds());
826
827 TString histNamePrefix(GetTestvarName());
828 histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
829
830 resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
831 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
832}
833
834////////////////////////////////////////////////////////////////////////////////
835
836void TMVA::MethodBase::NoErrorCalc(Double_t* const err, Double_t* const errUpper) {
837 if (err) *err=-1;
838 if (errUpper) *errUpper=-1;
839}
840
841////////////////////////////////////////////////////////////////////////////////
842
843Double_t TMVA::MethodBase::GetMvaValue( const Event* const ev, Double_t* err, Double_t* errUpper ) {
844 fTmpEvent = ev;
845 Double_t val = GetMvaValue(err, errUpper);
846 fTmpEvent = 0;
847 return val;
848}
849
850////////////////////////////////////////////////////////////////////////////////
851/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
852/// for a quick determination if an event would be selected as signal or background
853
855 return GetMvaValue()*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
856}
857////////////////////////////////////////////////////////////////////////////////
858/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
859/// for a quick determination if an event with this mva output value would be selected as signal or background
860
862 return mvaVal*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
863}
864
865////////////////////////////////////////////////////////////////////////////////
866/// prepare tree branch with the method's discriminating variable
867
869{
870 Data()->SetCurrentType(type);
871
872 ResultsClassification* clRes =
873 (ResultsClassification*)Data()->GetResults(GetMethodName(), type, Types::kClassification );
874
875 Long64_t nEvents = Data()->GetNEvents();
876 clRes->Resize( nEvents );
877
878 // use timer
879 Timer timer( nEvents, GetName(), kTRUE );
880 std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
881
882 // store time used for testing
884 SetTestTime(timer.ElapsedSeconds());
885
886 // load mva values and type to results object
887 for (Int_t ievt = 0; ievt < nEvents; ievt++) {
888 // note we do not need the trasformed event to get the signal/background information
889 // by calling Data()->GetEvent instead of this->GetEvent we access the untransformed one
890 auto ev = Data()->GetEvent(ievt);
891 clRes->SetValue(mvaValues[ievt], ievt, DataInfo().IsSignal(ev));
892 }
893}
894
895////////////////////////////////////////////////////////////////////////////////
896/// get all the MVA values for the events of the current Data type
897std::vector<Double_t> TMVA::MethodBase::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
898{
899
900 Long64_t nEvents = Data()->GetNEvents();
901 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
902 if (firstEvt < 0) firstEvt = 0;
903 std::vector<Double_t> values(lastEvt-firstEvt);
904 // log in case of looping on all the events
905 nEvents = values.size();
906
907 // use timer
908 Timer timer( nEvents, GetName(), kTRUE );
909
910 if (logProgress)
911 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
912 << "Evaluation of " << GetMethodName() << " on "
913 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
914 << " sample (" << nEvents << " events)" << Endl;
915
916 for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
917 Data()->SetCurrentEvent(ievt);
918 values[ievt] = GetMvaValue();
919
920 // print progress
921 if (logProgress) {
922 Int_t modulo = Int_t(nEvents/100);
923 if (modulo <= 0 ) modulo = 1;
924 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
925 }
926 }
927 if (logProgress) {
928 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
929 << "Elapsed time for evaluation of " << nEvents << " events: "
930 << timer.GetElapsedTime() << " " << Endl;
931 }
932
933 return values;
934}
935
936////////////////////////////////////////////////////////////////////////////////
937/// prepare tree branch with the method's discriminating variable
938
940{
941 Data()->SetCurrentType(type);
942
943 ResultsClassification* mvaProb =
944 (ResultsClassification*)Data()->GetResults(TString("prob_")+GetMethodName(), type, Types::kClassification );
945
946 Long64_t nEvents = Data()->GetNEvents();
947
948 // use timer
949 Timer timer( nEvents, GetName(), kTRUE );
950
951 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
952 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
953
954 mvaProb->Resize( nEvents );
955 for (Int_t ievt=0; ievt<nEvents; ievt++) {
956
957 Data()->SetCurrentEvent(ievt);
958 Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
959 if (proba < 0) break;
960 mvaProb->SetValue( proba, ievt, DataInfo().IsSignal( Data()->GetEvent()) );
961
962 // print progress
963 Int_t modulo = Int_t(nEvents/100);
964 if (modulo <= 0 ) modulo = 1;
965 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
966 }
967
968 Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
969 << "Elapsed time for evaluation of " << nEvents << " events: "
970 << timer.GetElapsedTime() << " " << Endl;
971}
972
973////////////////////////////////////////////////////////////////////////////////
974/// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
975///
976/// - bias = average deviation
977/// - dev = average absolute deviation
978/// - rms = rms of deviation
979
981 Double_t& dev, Double_t& devT,
982 Double_t& rms, Double_t& rmsT,
983 Double_t& mInf, Double_t& mInfT,
984 Double_t& corr,
986{
987 Types::ETreeType savedType = Data()->GetCurrentType();
988 Data()->SetCurrentType(type);
989
990 bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
991 Double_t sumw = 0;
992 Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
993 const Int_t nevt = GetNEvents();
994 Float_t* rV = new Float_t[nevt];
995 Float_t* tV = new Float_t[nevt];
996 Float_t* wV = new Float_t[nevt];
997 Float_t xmin = 1e30, xmax = -1e30;
998 Log() << kINFO << "Calculate regression for all events" << Endl;
999 Timer timer( nevt, GetName(), kTRUE );
1000 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1001
1002 const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
1003 Float_t t = ev->GetTarget(0);
1004 Float_t w = ev->GetWeight();
1005 Float_t r = GetRegressionValues()[0];
1006 Float_t d = (r-t);
1007
1008 // find min/max
1011
1012 // store for truncated RMS computation
1013 rV[ievt] = r;
1014 tV[ievt] = t;
1015 wV[ievt] = w;
1016
1017 // compute deviation-squared
1018 sumw += w;
1019 bias += w * d;
1020 dev += w * TMath::Abs(d);
1021 rms += w * d * d;
1022
1023 // compute correlation between target and regression estimate
1024 m1 += t*w; s1 += t*t*w;
1025 m2 += r*w; s2 += r*r*w;
1026 s12 += t*r;
1027 if ((ievt & 0xFF) == 0) timer.DrawProgressBar(ievt);
1028 }
1029 timer.DrawProgressBar(nevt - 1);
1030 Log() << kINFO << "Elapsed time for evaluation of " << nevt << " events: "
1031 << timer.GetElapsedTime() << " " << Endl;
1032
1033 // standard quantities
1034 bias /= sumw;
1035 dev /= sumw;
1036 rms /= sumw;
1037 rms = TMath::Sqrt(rms - bias*bias);
1038
1039 // correlation
1040 m1 /= sumw;
1041 m2 /= sumw;
1042 corr = s12/sumw - m1*m2;
1043 corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1044
1045 // create histogram required for computation of mutual information
1046 TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1047 TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1048
1049 // compute truncated RMS and fill histogram
1050 Double_t devMax = bias + 2*rms;
1051 Double_t devMin = bias - 2*rms;
1052 sumw = 0;
1053 int ic=0;
1054 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1055 Float_t d = (rV[ievt] - tV[ievt]);
1056 hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1057 if (d >= devMin && d <= devMax) {
1058 sumw += wV[ievt];
1059 biasT += wV[ievt] * d;
1060 devT += wV[ievt] * TMath::Abs(d);
1061 rmsT += wV[ievt] * d * d;
1062 histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1063 ic++;
1064 }
1065 }
1066 biasT /= sumw;
1067 devT /= sumw;
1068 rmsT /= sumw;
1069 rmsT = TMath::Sqrt(rmsT - biasT*biasT);
1070 mInf = gTools().GetMutualInformation( *hist );
1071 mInfT = gTools().GetMutualInformation( *histT );
1072
1073 delete hist;
1074 delete histT;
1075
1076 delete [] rV;
1077 delete [] tV;
1078 delete [] wV;
1079
1080 Data()->SetCurrentType(savedType);
1081}
1082
1083
1084////////////////////////////////////////////////////////////////////////////////
1085/// test multiclass classification
1086
1088{
1089 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
1090 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1091
1092 // GA evaluation of best cut for sig eff * sig pur. Slow, disabled for now.
1093 // Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test
1094 // data..." << Endl; for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1095 // resMulticlass->GetBestMultiClassCuts(icls);
1096 // }
1097
1098 // Create histograms for use in TMVA GUI
1099 TString histNamePrefix(GetTestvarName());
1100 TString histNamePrefixTest{histNamePrefix + "_Test"};
1101 TString histNamePrefixTrain{histNamePrefix + "_Train"};
1102
1103 resMulticlass->CreateMulticlassHistos(histNamePrefixTest, fNbinsMVAoutput, fNbinsH);
1104 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTest);
1105
1106 resMulticlass->CreateMulticlassHistos(histNamePrefixTrain, fNbinsMVAoutput, fNbinsH);
1107 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTrain);
1108}
1109
1110
1111////////////////////////////////////////////////////////////////////////////////
1112/// initialization
1113
1115{
1116 Data()->SetCurrentType(Types::kTesting);
1117
1118 ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
1119 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
1120
1121 // sanity checks: tree must exist, and theVar must be in tree
1122 if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1123 Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1124 << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1125 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1126 << " not found in tree" << Endl;
1127 }
1128
1129 // basic statistics operations are made in base class
1130 gTools().ComputeStat( GetEventCollection(Types::kTesting), mvaRes->GetValueVector(),
1131 fMeanS, fMeanB, fRmsS, fRmsB, fXmin, fXmax, fSignalClass );
1132
1133 // choose reasonable histogram ranges, by removing outliers
1134 Double_t nrms = 10;
1135 fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1136 fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1137
1138 // determine cut orientation
1139 fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
1140
1141 // fill 2 types of histograms for the various analyses
1142 // this one is for actual plotting
1143
1144 Double_t sxmax = fXmax+0.00001;
1145
1146 // classifier response distributions for training sample
1147 // MVA plots used for graphics representation (signal)
1148 TString TestvarName;
1149 if(IsSilentFile())
1150 {
1151 TestvarName=Form("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1152 }else
1153 {
1154 TestvarName=GetTestvarName();
1155 }
1156 TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1157 TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1158 mvaRes->Store(mva_s, "MVA_S");
1159 mvaRes->Store(mva_b, "MVA_B");
1160 mva_s->Sumw2();
1161 mva_b->Sumw2();
1162
1163 TH1* proba_s = 0;
1164 TH1* proba_b = 0;
1165 TH1* rarity_s = 0;
1166 TH1* rarity_b = 0;
1167 if (HasMVAPdfs()) {
1168 // P(MVA) plots used for graphics representation
1169 proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1170 proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1171 mvaRes->Store(proba_s, "Prob_S");
1172 mvaRes->Store(proba_b, "Prob_B");
1173 proba_s->Sumw2();
1174 proba_b->Sumw2();
1175
1176 // R(MVA) plots used for graphics representation
1177 rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1178 rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1179 mvaRes->Store(rarity_s, "Rar_S");
1180 mvaRes->Store(rarity_b, "Rar_B");
1181 rarity_s->Sumw2();
1182 rarity_b->Sumw2();
1183 }
1184
1185 // MVA plots used for efficiency calculations (large number of bins)
1186 TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1187 TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1188 mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1189 mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1190 mva_eff_s->Sumw2();
1191 mva_eff_b->Sumw2();
1192
1193 // fill the histograms
1194
1195 ResultsClassification* mvaProb = dynamic_cast<ResultsClassification*>
1196 (Data()->GetResults( TString("prob_")+GetMethodName(), Types::kTesting, Types::kMaxAnalysisType ) );
1197
1198 Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1199 if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1200 //std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1201
1202 //LM: this is needed to avoid crashes in ROOCCURVE
1203 if ( mvaRes->GetSize() != GetNEvents() ) {
1204 Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1205 assert(mvaRes->GetSize() == GetNEvents());
1206 }
1207
1208 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1209
1210 const Event* ev = GetEvent(ievt);
1211 Float_t v = (*mvaRes)[ievt][0];
1212 Float_t w = ev->GetWeight();
1213
1214 if (DataInfo().IsSignal(ev)) {
1215 //mvaResTypes->push_back(kTRUE);
1216 mva_s ->Fill( v, w );
1217 if (mvaProb) {
1218 proba_s->Fill( (*mvaProb)[ievt][0], w );
1219 rarity_s->Fill( GetRarity( v ), w );
1220 }
1221
1222 mva_eff_s ->Fill( v, w );
1223 }
1224 else {
1225 //mvaResTypes->push_back(kFALSE);
1226 mva_b ->Fill( v, w );
1227 if (mvaProb) {
1228 proba_b->Fill( (*mvaProb)[ievt][0], w );
1229 rarity_b->Fill( GetRarity( v ), w );
1230 }
1231 mva_eff_b ->Fill( v, w );
1232 }
1233 }
1234
1235 // uncomment those (and several others if you want unnormalized output
1236 gTools().NormHist( mva_s );
1237 gTools().NormHist( mva_b );
1238 gTools().NormHist( proba_s );
1239 gTools().NormHist( proba_b );
1240 gTools().NormHist( rarity_s );
1241 gTools().NormHist( rarity_b );
1242 gTools().NormHist( mva_eff_s );
1243 gTools().NormHist( mva_eff_b );
1244
1245 // create PDFs from histograms, using default splines, and no additional smoothing
1246 if (fSplS) { delete fSplS; fSplS = 0; }
1247 if (fSplB) { delete fSplB; fSplB = 0; }
1248 fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1249 fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1250}
1251
1252////////////////////////////////////////////////////////////////////////////////
1253/// general method used in writing the header of the weight files where
1254/// the used variables, variable transformation type etc. is specified
1255
1256void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1257{
1258 TString prefix = "";
1259 UserGroup_t * userInfo = gSystem->GetUserInfo();
1260
1261 tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1262 tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1263 tf.setf(std::ios::left);
1264 tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1265 << GetTrainingTMVAVersionCode() << "]" << std::endl;
1266 tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1267 << GetTrainingROOTVersionCode() << "]" << std::endl;
1268 tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1269 tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1270 tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1271 tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1272 tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1273
1274 TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1275
1276 tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1277 tf << prefix << std::endl;
1278
1279 delete userInfo;
1280
1281 // First write all options
1282 tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1283 WriteOptionsToStream( tf, prefix );
1284 tf << prefix << std::endl;
1285
1286 // Second write variable info
1287 tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1288 WriteVarsToStream( tf, prefix );
1289 tf << prefix << std::endl;
1290}
1291
1292////////////////////////////////////////////////////////////////////////////////
1293/// xml writing
1294
1295void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1296{
1297 void* it = gTools().AddChild(gi,"Info");
1298 gTools().AddAttr(it,"name", name);
1299 gTools().AddAttr(it,"value", value);
1300}
1301
1302////////////////////////////////////////////////////////////////////////////////
1303
1305 if (analysisType == Types::kRegression) {
1306 AddRegressionOutput( type );
1307 } else if (analysisType == Types::kMulticlass) {
1308 AddMulticlassOutput( type );
1309 } else {
1310 AddClassifierOutput( type );
1311 if (HasMVAPdfs())
1312 AddClassifierOutputProb( type );
1313 }
1314}
1315
1316////////////////////////////////////////////////////////////////////////////////
1317/// general method used in writing the header of the weight files where
1318/// the used variables, variable transformation type etc. is specified
1319
1320void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1321{
1322 if (!parent) return;
1323
1324 UserGroup_t* userInfo = gSystem->GetUserInfo();
1325
1326 void* gi = gTools().AddChild(parent, "GeneralInfo");
1327 AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1328 AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1329 AddInfoItem( gi, "Creator", userInfo->fUser);
1330 TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1331 AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1332 AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1333 AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1334 AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1335
1336 Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1337 TString analysisType((aType==Types::kRegression) ? "Regression" :
1338 (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1339 AddInfoItem( gi, "AnalysisType", analysisType );
1340 delete userInfo;
1341
1342 // write options
1343 AddOptionsXMLTo( parent );
1344
1345 // write variable info
1346 AddVarsXMLTo( parent );
1347
1348 // write spectator info
1349 if (fModelPersistence)
1350 AddSpectatorsXMLTo( parent );
1351
1352 // write class info if in multiclass mode
1353 AddClassesXMLTo(parent);
1354
1355 // write target info if in regression mode
1356 if (DoRegression()) AddTargetsXMLTo(parent);
1357
1358 // write transformations
1359 GetTransformationHandler(false).AddXMLTo( parent );
1360
1361 // write MVA variable distributions
1362 void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1363 if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1364 if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1365
1366 // write weights
1367 AddWeightsXMLTo( parent );
1368}
1369
1370////////////////////////////////////////////////////////////////////////////////
1371/// write reference MVA distributions (and other information)
1372/// to a ROOT type weight file
1373
1375{
1376 Bool_t addDirStatus = TH1::AddDirectoryStatus();
1377 TH1::AddDirectory( 0 ); // this avoids the binding of the hists in PDF to the current ROOT file
1378 fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1379 fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1380
1381 TH1::AddDirectory( addDirStatus );
1382
1383 ReadWeightsFromStream( rf );
1384
1385 SetTestvarName();
1386}
1387
1388////////////////////////////////////////////////////////////////////////////////
1389/// write options and weights to file
1390/// note that each one text file for the main configuration information
1391/// and one ROOT file for ROOT objects are created
1392
1394{
1395 // ---- create the text file
1396 TString tfname( GetWeightFileName() );
1397
1398 // writing xml file
1399 TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1400 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1401 << "Creating xml weight file: "
1402 << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1403 void* doc = gTools().xmlengine().NewDoc();
1404 void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1405 gTools().xmlengine().DocSetRootElement(doc,rootnode);
1406 gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1407 WriteStateToXML(rootnode);
1408 gTools().xmlengine().SaveDoc(doc,xmlfname);
1409 gTools().xmlengine().FreeDoc(doc);
1410}
1411
1412////////////////////////////////////////////////////////////////////////////////
1413/// Function to write options and weights to file
1414
1416{
1417 // get the filename
1418
1419 TString tfname(GetWeightFileName());
1420
1421 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1422 << "Reading weight file: "
1423 << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1424
1425 if (tfname.EndsWith(".xml") ) {
1426 void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1427 if (!doc) {
1428 Log() << kFATAL << "Error parsing XML file " << tfname << Endl;
1429 }
1430 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1431 ReadStateFromXML(rootnode);
1432 gTools().xmlengine().FreeDoc(doc);
1433 }
1434 else {
1435 std::filebuf fb;
1436 fb.open(tfname.Data(),std::ios::in);
1437 if (!fb.is_open()) { // file not found --> Error
1438 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1439 << "Unable to open input weight file: " << tfname << Endl;
1440 }
1441 std::istream fin(&fb);
1442 ReadStateFromStream(fin);
1443 fb.close();
1444 }
1445 if (!fTxtWeightsOnly) {
1446 // ---- read the ROOT file
1447 TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1448 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1449 << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1450 TFile* rfile = TFile::Open( rfname, "READ" );
1451 ReadStateFromStream( *rfile );
1452 rfile->Close();
1453 }
1454}
1455////////////////////////////////////////////////////////////////////////////////
1456/// for reading from memory
1457
1459 void* doc = gTools().xmlengine().ParseString(xmlstr);
1460 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1461 ReadStateFromXML(rootnode);
1462 gTools().xmlengine().FreeDoc(doc);
1463
1464 return;
1465}
1466
1467////////////////////////////////////////////////////////////////////////////////
1468
1470{
1471
1472 TString fullMethodName;
1473 gTools().ReadAttr( methodNode, "Method", fullMethodName );
1474
1475 fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1476
1477 // update logger
1478 Log().SetSource( GetName() );
1479 Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1480 << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1481
1482 // after the method name is read, the testvar can be set
1483 SetTestvarName();
1484
1485 TString nodeName("");
1486 void* ch = gTools().GetChild(methodNode);
1487 while (ch!=0) {
1488 nodeName = TString( gTools().GetName(ch) );
1489
1490 if (nodeName=="GeneralInfo") {
1491 // read analysis type
1492
1493 TString name(""),val("");
1494 void* antypeNode = gTools().GetChild(ch);
1495 while (antypeNode) {
1496 gTools().ReadAttr( antypeNode, "name", name );
1497
1498 if (name == "TrainingTime")
1499 gTools().ReadAttr( antypeNode, "value", fTrainTime );
1500
1501 if (name == "AnalysisType") {
1502 gTools().ReadAttr( antypeNode, "value", val );
1503 val.ToLower();
1504 if (val == "regression" ) SetAnalysisType( Types::kRegression );
1505 else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1506 else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1507 else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1508 }
1509
1510 if (name == "TMVA Release" || name == "TMVA") {
1511 TString s;
1512 gTools().ReadAttr( antypeNode, "value", s);
1513 fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1514 Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1515 }
1516
1517 if (name == "ROOT Release" || name == "ROOT") {
1518 TString s;
1519 gTools().ReadAttr( antypeNode, "value", s);
1520 fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1521 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1522 << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1523 }
1524 antypeNode = gTools().GetNextChild(antypeNode);
1525 }
1526 }
1527 else if (nodeName=="Options") {
1528 ReadOptionsFromXML(ch);
1529 ParseOptions();
1530
1531 }
1532 else if (nodeName=="Variables") {
1533 ReadVariablesFromXML(ch);
1534 }
1535 else if (nodeName=="Spectators") {
1536 ReadSpectatorsFromXML(ch);
1537 }
1538 else if (nodeName=="Classes") {
1539 if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1540 }
1541 else if (nodeName=="Targets") {
1542 if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1543 }
1544 else if (nodeName=="Transformations") {
1545 GetTransformationHandler().ReadFromXML(ch);
1546 }
1547 else if (nodeName=="MVAPdfs") {
1548 TString pdfname;
1549 if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1550 if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1551 void* pdfnode = gTools().GetChild(ch);
1552 if (pdfnode) {
1553 gTools().ReadAttr(pdfnode, "Name", pdfname);
1554 fMVAPdfS = new PDF(pdfname);
1555 fMVAPdfS->ReadXML(pdfnode);
1556 pdfnode = gTools().GetNextChild(pdfnode);
1557 gTools().ReadAttr(pdfnode, "Name", pdfname);
1558 fMVAPdfB = new PDF(pdfname);
1559 fMVAPdfB->ReadXML(pdfnode);
1560 }
1561 }
1562 else if (nodeName=="Weights") {
1563 ReadWeightsFromXML(ch);
1564 }
1565 else {
1566 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1567 }
1568 ch = gTools().GetNextChild(ch);
1569
1570 }
1571
1572 // update transformation handler
1573 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1574}
1575
1576////////////////////////////////////////////////////////////////////////////////
1577/// read the header from the weight files of the different MVA methods
1578
1580{
1581 char buf[512];
1582
1583 // when reading from stream, we assume the files are produced with TMVA<=397
1584 SetAnalysisType(Types::kClassification);
1585
1586
1587 // first read the method name
1588 GetLine(fin,buf);
1589 while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1590 TString namestr(buf);
1591
1592 TString methodType = namestr(0,namestr.Index("::"));
1593 methodType = methodType(methodType.Last(' '),methodType.Length());
1594 methodType = methodType.Strip(TString::kLeading);
1595
1596 TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1597 methodName = methodName.Strip(TString::kLeading);
1598 if (methodName == "") methodName = methodType;
1599 fMethodName = methodName;
1600
1601 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1602
1603 // update logger
1604 Log().SetSource( GetName() );
1605
1606 // now the question is whether to read the variables first or the options (well, of course the order
1607 // of writing them needs to agree)
1608 //
1609 // the option "Decorrelation" is needed to decide if the variables we
1610 // read are decorrelated or not
1611 //
1612 // the variables are needed by some methods (TMLP) to build the NN
1613 // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1614 // we read the variables, and then we process the options
1615
1616 // now read all options
1617 GetLine(fin,buf);
1618 while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1619 ReadOptionsFromStream(fin);
1620 ParseOptions();
1621
1622 // Now read variable info
1623 fin.getline(buf,512);
1624 while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1625 ReadVarsFromStream(fin);
1626
1627 // now we process the options (of the derived class)
1628 ProcessOptions();
1629
1630 if (IsNormalised()) {
1632 GetTransformationHandler().AddTransformation( new VariableNormalizeTransform(DataInfo()), -1 );
1633 norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1634 }
1635 VariableTransformBase *varTrafo(0), *varTrafo2(0);
1636 if ( fVarTransformString == "None") {
1637 if (fUseDecorr)
1638 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1639 } else if ( fVarTransformString == "Decorrelate" ) {
1640 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1641 } else if ( fVarTransformString == "PCA" ) {
1642 varTrafo = GetTransformationHandler().AddTransformation( new VariablePCATransform(DataInfo()), -1 );
1643 } else if ( fVarTransformString == "Uniform" ) {
1644 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1645 } else if ( fVarTransformString == "Gauss" ) {
1646 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1647 } else if ( fVarTransformString == "GaussDecorr" ) {
1648 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1649 varTrafo2 = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1650 } else {
1651 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1652 << fVarTransformString << "' unknown." << Endl;
1653 }
1654 // Now read decorrelation matrix if available
1655 if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1656 fin.getline(buf,512);
1657 while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1658 if (varTrafo) {
1659 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1660 varTrafo->ReadTransformationFromStream(fin, trafo );
1661 }
1662 if (varTrafo2) {
1663 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1664 varTrafo2->ReadTransformationFromStream(fin, trafo );
1665 }
1666 }
1667
1668
1669 if (HasMVAPdfs()) {
1670 // Now read the MVA PDFs
1671 fin.getline(buf,512);
1672 while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1673 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1674 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1675 fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1676 fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1677 fMVAPdfS->SetReadingVersion( GetTrainingTMVAVersionCode() );
1678 fMVAPdfB->SetReadingVersion( GetTrainingTMVAVersionCode() );
1679
1680 fin >> *fMVAPdfS;
1681 fin >> *fMVAPdfB;
1682 }
1683
1684 // Now read weights
1685 fin.getline(buf,512);
1686 while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1687 fin.getline(buf,512);
1688 ReadWeightsFromStream( fin );;
1689
1690 // update transformation handler
1691 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1692
1693}
1694
1695////////////////////////////////////////////////////////////////////////////////
1696/// write the list of variables (name, min, max) for a given data
1697/// transformation method to the stream
1698
1699void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1700{
1701 o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1702 std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1703 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1704 o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1705 varIt = DataInfo().GetSpectatorInfos().begin();
1706 for (; varIt!=DataInfo().GetSpectatorInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1707}
1708
1709////////////////////////////////////////////////////////////////////////////////
1710/// Read the variables (name, min, max) for a given data
1711/// transformation method from the stream. In the stream we only
1712/// expect the limits which will be set
1713
1715{
1716 TString dummy;
1717 UInt_t readNVar;
1718 istr >> dummy >> readNVar;
1719
1720 if (readNVar!=DataInfo().GetNVariables()) {
1721 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1722 << " while there are " << readNVar << " variables declared in the file"
1723 << Endl;
1724 }
1725
1726 // we want to make sure all variables are read in the order they are defined
1727 VariableInfo varInfo;
1728 std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1729 int varIdx = 0;
1730 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt, ++varIdx) {
1731 varInfo.ReadFromStream(istr);
1732 if (varIt->GetExpression() == varInfo.GetExpression()) {
1733 varInfo.SetExternalLink((*varIt).GetExternalLink());
1734 (*varIt) = varInfo;
1735 }
1736 else {
1737 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1738 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1739 Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1740 Log() << kINFO << "the correct working of the method):" << Endl;
1741 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1742 Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1743 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1744 }
1745 }
1746}
1747
1748////////////////////////////////////////////////////////////////////////////////
1749/// write variable info to XML
1750
1751void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1752{
1753 void* vars = gTools().AddChild(parent, "Variables");
1754 gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1755
1756 for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1757 VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1758 void* var = gTools().AddChild( vars, "Variable" );
1759 gTools().AddAttr( var, "VarIndex", idx );
1760 vi.AddToXML( var );
1761 }
1762}
1763
1764////////////////////////////////////////////////////////////////////////////////
1765/// write spectator info to XML
1766
1768{
1769 void* specs = gTools().AddChild(parent, "Spectators");
1770
1771 UInt_t writeIdx=0;
1772 for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1773
1774 VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1775
1776 // we do not want to write spectators that are category-cuts,
1777 // except if the method is the category method and the spectators belong to it
1778 if (vi.GetVarType()=='C') continue;
1779
1780 void* spec = gTools().AddChild( specs, "Spectator" );
1781 gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1782 vi.AddToXML( spec );
1783 }
1784 gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1785}
1786
1787////////////////////////////////////////////////////////////////////////////////
1788/// write class info to XML
1789
1790void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1791{
1792 UInt_t nClasses=DataInfo().GetNClasses();
1793
1794 void* classes = gTools().AddChild(parent, "Classes");
1795 gTools().AddAttr( classes, "NClass", nClasses );
1796
1797 for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1798 ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1799 TString className =classInfo->GetName();
1800 UInt_t classNumber=classInfo->GetNumber();
1801
1802 void* classNode=gTools().AddChild(classes, "Class");
1803 gTools().AddAttr( classNode, "Name", className );
1804 gTools().AddAttr( classNode, "Index", classNumber );
1805 }
1806}
1807////////////////////////////////////////////////////////////////////////////////
1808/// write target info to XML
1809
1810void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1811{
1812 void* targets = gTools().AddChild(parent, "Targets");
1813 gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1814
1815 for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1816 VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1817 void* tar = gTools().AddChild( targets, "Target" );
1818 gTools().AddAttr( tar, "TargetIndex", idx );
1819 vi.AddToXML( tar );
1820 }
1821}
1822
1823////////////////////////////////////////////////////////////////////////////////
1824/// read variable info from XML
1825
1827{
1828 UInt_t readNVar;
1829 gTools().ReadAttr( varnode, "NVar", readNVar);
1830
1831 if (readNVar!=DataInfo().GetNVariables()) {
1832 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1833 << " while there are " << readNVar << " variables declared in the file"
1834 << Endl;
1835 }
1836
1837 // we want to make sure all variables are read in the order they are defined
1838 VariableInfo readVarInfo, existingVarInfo;
1839 int varIdx = 0;
1840 void* ch = gTools().GetChild(varnode);
1841 while (ch) {
1842 gTools().ReadAttr( ch, "VarIndex", varIdx);
1843 existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1844 readVarInfo.ReadFromXML(ch);
1845
1846 if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1847 readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1848 existingVarInfo = readVarInfo;
1849 }
1850 else {
1851 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1852 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1853 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1854 Log() << kINFO << "correct working of the method):" << Endl;
1855 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1856 Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1857 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1858 }
1859 ch = gTools().GetNextChild(ch);
1860 }
1861}
1862
1863////////////////////////////////////////////////////////////////////////////////
1864/// read spectator info from XML
1865
1867{
1868 UInt_t readNSpec;
1869 gTools().ReadAttr( specnode, "NSpec", readNSpec);
1870
1871 if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1872 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1873 << " while there are " << readNSpec << " spectators declared in the file"
1874 << Endl;
1875 }
1876
1877 // we want to make sure all variables are read in the order they are defined
1878 VariableInfo readSpecInfo, existingSpecInfo;
1879 int specIdx = 0;
1880 void* ch = gTools().GetChild(specnode);
1881 while (ch) {
1882 gTools().ReadAttr( ch, "SpecIndex", specIdx);
1883 existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1884 readSpecInfo.ReadFromXML(ch);
1885
1886 if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1887 readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1888 existingSpecInfo = readSpecInfo;
1889 }
1890 else {
1891 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1892 Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1893 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1894 Log() << kINFO << "correct working of the method):" << Endl;
1895 Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1896 Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1897 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1898 }
1899 ch = gTools().GetNextChild(ch);
1900 }
1901}
1902
1903////////////////////////////////////////////////////////////////////////////////
1904/// read number of classes from XML
1905
1907{
1908 UInt_t readNCls;
1909 // coverity[tainted_data_argument]
1910 gTools().ReadAttr( clsnode, "NClass", readNCls);
1911
1912 TString className="";
1913 UInt_t classIndex=0;
1914 void* ch = gTools().GetChild(clsnode);
1915 if (!ch) {
1916 for (UInt_t icls = 0; icls<readNCls;++icls) {
1917 TString classname = Form("class%i",icls);
1918 DataInfo().AddClass(classname);
1919
1920 }
1921 }
1922 else{
1923 while (ch) {
1924 gTools().ReadAttr( ch, "Index", classIndex);
1925 gTools().ReadAttr( ch, "Name", className );
1926 DataInfo().AddClass(className);
1927
1928 ch = gTools().GetNextChild(ch);
1929 }
1930 }
1931
1932 // retrieve signal and background class index
1933 if (DataInfo().GetClassInfo("Signal") != 0) {
1934 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1935 }
1936 else
1937 fSignalClass=0;
1938 if (DataInfo().GetClassInfo("Background") != 0) {
1939 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
1940 }
1941 else
1942 fBackgroundClass=1;
1943}
1944
1945////////////////////////////////////////////////////////////////////////////////
1946/// read target info from XML
1947
1949{
1950 UInt_t readNTar;
1951 gTools().ReadAttr( tarnode, "NTrgt", readNTar);
1952
1953 int tarIdx = 0;
1954 TString expression;
1955 void* ch = gTools().GetChild(tarnode);
1956 while (ch) {
1957 gTools().ReadAttr( ch, "TargetIndex", tarIdx);
1958 gTools().ReadAttr( ch, "Expression", expression);
1959 DataInfo().AddTarget(expression,"","",0,0);
1960
1961 ch = gTools().GetNextChild(ch);
1962 }
1963}
1964
1965////////////////////////////////////////////////////////////////////////////////
1966/// returns the ROOT directory where info/histograms etc of the
1967/// corresponding MVA method instance are stored
1968
1970{
1971 if (fBaseDir != 0) return fBaseDir;
1972 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
1973
1974 if (IsSilentFile()) {
1975 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
1976 << "MethodBase::BaseDir() - No directory exists when running a Method without output file. Enable the "
1977 "output when creating the factory"
1978 << Endl;
1979 }
1980
1981 TDirectory* methodDir = MethodBaseDir();
1982 if (methodDir==0)
1983 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
1984
1985 TString defaultDir = GetMethodName();
1986 TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
1987 if(!sdir)
1988 {
1989 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
1990 sdir = methodDir->mkdir(defaultDir);
1991 sdir->cd();
1992 // write weight file name into target file
1993 if (fModelPersistence) {
1994 TObjString wfilePath( gSystem->WorkingDirectory() );
1995 TObjString wfileName( GetWeightFileName() );
1996 wfilePath.Write( "TrainingPath" );
1997 wfileName.Write( "WeightFileName" );
1998 }
1999 }
2000
2001 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
2002 return sdir;
2003}
2004
2005////////////////////////////////////////////////////////////////////////////////
2006/// returns the ROOT directory where all instances of the
2007/// corresponding MVA method are stored
2008
2010{
2011 if (fMethodBaseDir != 0) {
2012 return fMethodBaseDir;
2013 }
2014
2015 const char *datasetName = DataInfo().GetName();
2016
2017 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodTypeName()
2018 << " not set yet --> check if already there.." << Endl;
2019
2020 TDirectory *factoryBaseDir = GetFile();
2021 if (!factoryBaseDir) return nullptr;
2022 fMethodBaseDir = factoryBaseDir->GetDirectory(datasetName);
2023 if (!fMethodBaseDir) {
2024 fMethodBaseDir = factoryBaseDir->mkdir(datasetName, Form("Base directory for dataset %s", datasetName));
2025 if (!fMethodBaseDir) {
2026 Log() << kFATAL << "Can not create dir " << datasetName;
2027 }
2028 }
2029 TString methodTypeDir = Form("Method_%s", GetMethodTypeName().Data());
2030 fMethodBaseDir = fMethodBaseDir->GetDirectory(methodTypeDir.Data());
2031
2032 if (!fMethodBaseDir) {
2033 TDirectory *datasetDir = factoryBaseDir->GetDirectory(datasetName);
2034 TString methodTypeDirHelpStr = Form("Directory for all %s methods", GetMethodTypeName().Data());
2035 fMethodBaseDir = datasetDir->mkdir(methodTypeDir.Data(), methodTypeDirHelpStr);
2036 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodName()
2037 << " does not exist yet--> created it" << Endl;
2038 }
2039
2040 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName)
2041 << "Return from MethodBaseDir() after creating base directory " << Endl;
2042 return fMethodBaseDir;
2043}
2044
2045////////////////////////////////////////////////////////////////////////////////
2046/// set directory of weight file
2047
2049{
2050 fFileDir = fileDir;
2051 gSystem->mkdir( fFileDir, kTRUE );
2052}
2053
2054////////////////////////////////////////////////////////////////////////////////
2055/// set the weight file name (depreciated)
2056
2058{
2059 fWeightFile = theWeightFile;
2060}
2061
2062////////////////////////////////////////////////////////////////////////////////
2063/// retrieve weight file name
2064
2066{
2067 if (fWeightFile!="") return fWeightFile;
2068
2069 // the default consists of
2070 // directory/jobname_methodname_suffix.extension.{root/txt}
2071 TString suffix = "";
2072 TString wFileDir(GetWeightFileDir());
2073 TString wFileName = GetJobName() + "_" + GetMethodName() +
2074 suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml";
2075 if (wFileDir.IsNull() ) return wFileName;
2076 // add weight file directory of it is not null
2077 return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2078 + wFileName );
2079}
2080////////////////////////////////////////////////////////////////////////////////
2081/// writes all MVA evaluation histograms to file
2082
2084{
2085 BaseDir()->cd();
2086
2087
2088 // write MVA PDFs to file - if exist
2089 if (0 != fMVAPdfS) {
2090 fMVAPdfS->GetOriginalHist()->Write();
2091 fMVAPdfS->GetSmoothedHist()->Write();
2092 fMVAPdfS->GetPDFHist()->Write();
2093 }
2094 if (0 != fMVAPdfB) {
2095 fMVAPdfB->GetOriginalHist()->Write();
2096 fMVAPdfB->GetSmoothedHist()->Write();
2097 fMVAPdfB->GetPDFHist()->Write();
2098 }
2099
2100 // write result-histograms
2101 Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2102 if (!results)
2103 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2104 << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2105 << "/kMaxAnalysisType" << Endl;
2106 results->GetStorage()->Write();
2107 if (treetype==Types::kTesting) {
2108 // skipping plotting of variables if too many (default is 200)
2109 if ((int) DataInfo().GetNVariables()< gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables)
2110 GetTransformationHandler().PlotVariables (GetEventCollection( Types::kTesting ), BaseDir() );
2111 else
2112 Log() << kINFO << TString::Format("Dataset[%s] : ",DataInfo().GetName())
2113 << " variable plots are not produces ! The number of variables is " << DataInfo().GetNVariables()
2114 << " , it is larger than " << gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables << Endl;
2115 }
2116}
2117
2118////////////////////////////////////////////////////////////////////////////////
2119/// write special monitoring histograms to file
2120/// dummy implementation here -----------------
2121
2123{
2124}
2125
2126////////////////////////////////////////////////////////////////////////////////
2127/// reads one line from the input stream
2128/// checks for certain keywords and interprets
2129/// the line if keywords are found
2130
2131Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2132{
2133 fin.getline(buf,512);
2134 TString line(buf);
2135 if (line.BeginsWith("TMVA Release")) {
2136 Ssiz_t start = line.First('[')+1;
2137 Ssiz_t length = line.Index("]",start)-start;
2138 TString code = line(start,length);
2139 std::stringstream s(code.Data());
2140 s >> fTMVATrainingVersion;
2141 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2142 }
2143 if (line.BeginsWith("ROOT Release")) {
2144 Ssiz_t start = line.First('[')+1;
2145 Ssiz_t length = line.Index("]",start)-start;
2146 TString code = line(start,length);
2147 std::stringstream s(code.Data());
2148 s >> fROOTTrainingVersion;
2149 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2150 }
2151 if (line.BeginsWith("Analysis type")) {
2152 Ssiz_t start = line.First('[')+1;
2153 Ssiz_t length = line.Index("]",start)-start;
2154 TString code = line(start,length);
2155 std::stringstream s(code.Data());
2156 std::string analysisType;
2157 s >> analysisType;
2158 if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2159 else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2160 else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2161 else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2162
2163 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2164 << (GetAnalysisType() == Types::kRegression ? "Regression" :
2165 (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2166 }
2167
2168 return true;
2169}
2170
2171////////////////////////////////////////////////////////////////////////////////
2172/// Create PDFs of the MVA output variables
2173
2175{
2176 Data()->SetCurrentType(Types::kTraining);
2177
2178 // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2179 // otherwise they will be only used 'online'
2180 ResultsClassification * mvaRes = dynamic_cast<ResultsClassification*>
2181 ( Data()->GetResults(GetMethodName(), Types::kTraining, Types::kClassification) );
2182
2183 if (mvaRes==0 || mvaRes->GetSize()==0) {
2184 Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2185 }
2186
2187 Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2188 Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2189
2190 // create histograms that serve as basis to create the MVA Pdfs
2191 TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2192 fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2193 TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2194 fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2195
2196
2197 // compute sum of weights properly
2198 histMVAPdfS->Sumw2();
2199 histMVAPdfB->Sumw2();
2200
2201 // fill histograms
2202 for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2203 Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2204 Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2205
2206 if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2207 else histMVAPdfB->Fill( theVal, theWeight );
2208 }
2209
2210 gTools().NormHist( histMVAPdfS );
2211 gTools().NormHist( histMVAPdfB );
2212
2213 // momentary hack for ROOT problem
2214 if(!IsSilentFile())
2215 {
2216 histMVAPdfS->Write();
2217 histMVAPdfB->Write();
2218 }
2219 // create PDFs
2220 fMVAPdfS->BuildPDF ( histMVAPdfS );
2221 fMVAPdfB->BuildPDF ( histMVAPdfB );
2222 fMVAPdfS->ValidatePDF( histMVAPdfS );
2223 fMVAPdfB->ValidatePDF( histMVAPdfB );
2224
2225 if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2226 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2227 << Form( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2228 GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2229 << Endl;
2230 }
2231
2232 delete histMVAPdfS;
2233 delete histMVAPdfB;
2234}
2235
2237 // the simple one, automatically calculates the mvaVal and uses the
2238 // SAME sig/bkg ratio as given in the training sample (typically 50/50
2239 // .. (NormMode=EqualNumEvents) but can be different)
2240 if (!fMVAPdfS || !fMVAPdfB) {
2241 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2242 CreateMVAPdfs();
2243 }
2244 Double_t sigFraction = DataInfo().GetTrainingSumSignalWeights() / (DataInfo().GetTrainingSumSignalWeights() + DataInfo().GetTrainingSumBackgrWeights() );
2245 Double_t mvaVal = GetMvaValue(ev);
2246
2247 return GetProba(mvaVal,sigFraction);
2248
2249}
2250////////////////////////////////////////////////////////////////////////////////
2251/// compute likelihood ratio
2252
2254{
2255 if (!fMVAPdfS || !fMVAPdfB) {
2256 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2257 return -1.0;
2258 }
2259 Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2260 Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2261
2262 Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2263
2264 return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2265}
2266
2267////////////////////////////////////////////////////////////////////////////////
2268/// compute rarity:
2269/// \f[
2270/// R(x) = \int_{[-\infty..x]} { PDF(x') dx' }
2271/// \f]
2272/// where PDF(x) is the PDF of the classifier's signal or background distribution
2273
2275{
2276 if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2277 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2278 << "select option \"CreateMVAPdfs\"" << Endl;
2279 return 0.0;
2280 }
2281
2282 PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2283
2284 return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2285}
2286
2287////////////////////////////////////////////////////////////////////////////////
2288/// fill background efficiency (resp. rejection) versus signal efficiency plots
2289/// returns signal efficiency at background efficiency indicated in theString
2290
2292{
2293 Data()->SetCurrentType(type);
2294 Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2295 std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2296
2297 // parse input string for required background efficiency
2298 TList* list = gTools().ParseFormatLine( theString );
2299
2300 // sanity check
2301 Bool_t computeArea = kFALSE;
2302 if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2303 else if (list->GetSize() > 2) {
2304 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2305 << " in string: " << theString
2306 << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2307 delete list;
2308 return -1;
2309 }
2310
2311 // sanity check
2312 if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2313 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2314 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2315 delete list;
2316 return -1.0;
2317 }
2318
2319 // create histograms
2320
2321 // first, get efficiency histograms for signal and background
2322 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2323 Double_t xmin = effhist->GetXaxis()->GetXmin();
2324 Double_t xmax = effhist->GetXaxis()->GetXmax();
2325
2326 TTHREAD_TLS(Double_t) nevtS;
2327
2328 // first round ? --> create histograms
2329 if (results->DoesExist("MVA_EFF_S")==0) {
2330
2331 // for efficiency plot
2332 TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2333 TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2334 results->Store(eff_s, "MVA_EFF_S");
2335 results->Store(eff_b, "MVA_EFF_B");
2336
2337 // sign if cut
2338 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2339
2340 // this method is unbinned
2341 nevtS = 0;
2342 for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2343
2344 // read the tree
2345 Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2346 Float_t theWeight = GetEvent(ievt)->GetWeight();
2347 Float_t theVal = (*mvaRes)[ievt];
2348
2349 // select histogram depending on if sig or bgd
2350 TH1* theHist = isSignal ? eff_s : eff_b;
2351
2352 // count signal and background events in tree
2353 if (isSignal) nevtS+=theWeight;
2354
2355 TAxis* axis = theHist->GetXaxis();
2356 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2357 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2358 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2359 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2360 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2361
2362 if (sign > 0)
2363 for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2364 else if (sign < 0)
2365 for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2366 else
2367 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2368 }
2369
2370 // renormalise maximum to <=1
2371 // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2372 // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2373
2374 eff_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_s->GetMaximum()) );
2375 eff_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_b->GetMaximum()) );
2376
2377 // background efficiency versus signal efficiency
2378 TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2379 results->Store(eff_BvsS, "MVA_EFF_BvsS");
2380 eff_BvsS->SetXTitle( "Signal eff" );
2381 eff_BvsS->SetYTitle( "Backgr eff" );
2382
2383 // background rejection (=1-eff.) versus signal efficiency
2384 TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2385 results->Store(rej_BvsS);
2386 rej_BvsS->SetXTitle( "Signal eff" );
2387 rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2388
2389 // inverse background eff (1/eff.) versus signal efficiency
2390 TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2391 GetTestvarName(), fNbins, 0, 1 );
2392 results->Store(inveff_BvsS);
2393 inveff_BvsS->SetXTitle( "Signal eff" );
2394 inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2395
2396 // use root finder
2397 // spline background efficiency plot
2398 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2400 fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2401 fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2402
2403 // verify spline sanity
2404 gTools().CheckSplines( eff_s, fSplRefS );
2405 gTools().CheckSplines( eff_b, fSplRefB );
2406 }
2407
2408 // make the background-vs-signal efficiency plot
2409
2410 // create root finder
2411 RootFinder rootFinder( this, fXmin, fXmax );
2412
2413 Double_t effB = 0;
2414 fEffS = eff_s; // to be set for the root finder
2415 for (Int_t bini=1; bini<=fNbins; bini++) {
2416
2417 // find cut value corresponding to a given signal efficiency
2418 Double_t effS = eff_BvsS->GetBinCenter( bini );
2419 Double_t cut = rootFinder.Root( effS );
2420
2421 // retrieve background efficiency for given cut
2422 if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2423 else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2424
2425 // and fill histograms
2426 eff_BvsS->SetBinContent( bini, effB );
2427 rej_BvsS->SetBinContent( bini, 1.0-effB );
2428 if (effB>std::numeric_limits<double>::epsilon())
2429 inveff_BvsS->SetBinContent( bini, 1.0/effB );
2430 }
2431
2432 // create splines for histogram
2433 fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2434
2435 // search for overlap point where, when cutting on it,
2436 // one would obtain: eff_S = rej_B = 1 - eff_B
2437 Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2438 Int_t nbins_ = 5000;
2439 for (Int_t bini=1; bini<=nbins_; bini++) {
2440
2441 // get corresponding signal and background efficiencies
2442 effS = (bini - 0.5)/Float_t(nbins_);
2443 rejB = 1.0 - fSpleffBvsS->Eval( effS );
2444
2445 // find signal efficiency that corresponds to required background efficiency
2446 if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2447 effS_ = effS;
2448 rejB_ = rejB;
2449 }
2450
2451 // find cut that corresponds to signal efficiency and update signal-like criterion
2452 Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2453 SetSignalReferenceCut( cut );
2454 fEffS = 0;
2455 }
2456
2457 // must exist...
2458 if (0 == fSpleffBvsS) {
2459 delete list;
2460 return 0.0;
2461 }
2462
2463 // now find signal efficiency that corresponds to required background efficiency
2464 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2465 Int_t nbins_ = 1000;
2466
2467 if (computeArea) {
2468
2469 // compute area of rej-vs-eff plot
2470 Double_t integral = 0;
2471 for (Int_t bini=1; bini<=nbins_; bini++) {
2472
2473 // get corresponding signal and background efficiencies
2474 effS = (bini - 0.5)/Float_t(nbins_);
2475 effB = fSpleffBvsS->Eval( effS );
2476 integral += (1.0 - effB);
2477 }
2478 integral /= nbins_;
2479
2480 delete list;
2481 return integral;
2482 }
2483 else {
2484
2485 // that will be the value of the efficiency retured (does not affect
2486 // the efficiency-vs-bkg plot which is done anyway.
2487 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2488
2489 // find precise efficiency value
2490 for (Int_t bini=1; bini<=nbins_; bini++) {
2491
2492 // get corresponding signal and background efficiencies
2493 effS = (bini - 0.5)/Float_t(nbins_);
2494 effB = fSpleffBvsS->Eval( effS );
2495
2496 // find signal efficiency that corresponds to required background efficiency
2497 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2498 effS_ = effS;
2499 effB_ = effB;
2500 }
2501
2502 // take mean between bin above and bin below
2503 effS = 0.5*(effS + effS_);
2504
2505 effSerr = 0;
2506 if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2507
2508 delete list;
2509 return effS;
2510 }
2511
2512 return -1;
2513}
2514
2515////////////////////////////////////////////////////////////////////////////////
2516
2518{
2519 Data()->SetCurrentType(Types::kTraining);
2520
2521 Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, Types::kNoAnalysisType);
2522
2523 // fill background efficiency (resp. rejection) versus signal efficiency plots
2524 // returns signal efficiency at background efficiency indicated in theString
2525
2526 // parse input string for required background efficiency
2527 TList* list = gTools().ParseFormatLine( theString );
2528 // sanity check
2529
2530 if (list->GetSize() != 2) {
2531 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2532 << " in string: " << theString
2533 << " | required format, e.g., Efficiency:0.05" << Endl;
2534 delete list;
2535 return -1;
2536 }
2537 // that will be the value of the efficiency retured (does not affect
2538 // the efficiency-vs-bkg plot which is done anyway.
2539 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2540
2541 delete list;
2542
2543 // sanity check
2544 if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2545 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2546 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2547 << Endl;
2548 return -1.0;
2549 }
2550
2551 // create histogram
2552
2553 // first, get efficiency histograms for signal and background
2554 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2555 Double_t xmin = effhist->GetXaxis()->GetXmin();
2556 Double_t xmax = effhist->GetXaxis()->GetXmax();
2557
2558 // first round ? --> create and fill histograms
2559 if (results->DoesExist("MVA_TRAIN_S")==0) {
2560
2561 // classifier response distributions for test sample
2562 Double_t sxmax = fXmax+0.00001;
2563
2564 // MVA plots on the training sample (check for overtraining)
2565 TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2566 TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2567 results->Store(mva_s_tr, "MVA_TRAIN_S");
2568 results->Store(mva_b_tr, "MVA_TRAIN_B");
2569 mva_s_tr->Sumw2();
2570 mva_b_tr->Sumw2();
2571
2572 // Training efficiency plots
2573 TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2574 fNbinsH, xmin, xmax );
2575 TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2576 fNbinsH, xmin, xmax );
2577 results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2578 results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2579
2580 // sign if cut
2581 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2582
2583 std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2584 assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2585
2586 // this method is unbinned
2587 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2588
2589 Data()->SetCurrentEvent(ievt);
2590 const Event* ev = GetEvent();
2591
2592 Double_t theVal = mvaValues[ievt];
2593 Double_t theWeight = ev->GetWeight();
2594
2595 TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2596 TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2597
2598 theClsHist->Fill( theVal, theWeight );
2599
2600 TAxis* axis = theEffHist->GetXaxis();
2601 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2602 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2603 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2604 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2605 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2606
2607 if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2608 else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2609 }
2610
2611 // normalise output distributions
2612 // uncomment those (and several others if you want unnormalized output
2613 gTools().NormHist( mva_s_tr );
2614 gTools().NormHist( mva_b_tr );
2615
2616 // renormalise to maximum
2617 mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2618 mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2619
2620 // Training background efficiency versus signal efficiency
2621 TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2622 // Training background rejection (=1-eff.) versus signal efficiency
2623 TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2624 results->Store(eff_bvss, "EFF_BVSS_TR");
2625 results->Store(rej_bvss, "REJ_BVSS_TR");
2626
2627 // use root finder
2628 // spline background efficiency plot
2629 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2631 if (fSplTrainRefS) delete fSplTrainRefS;
2632 if (fSplTrainRefB) delete fSplTrainRefB;
2633 fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2634 fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2635
2636 // verify spline sanity
2637 gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2638 gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2639 }
2640
2641 // make the background-vs-signal efficiency plot
2642
2643 // create root finder
2644 RootFinder rootFinder(this, fXmin, fXmax );
2645
2646 Double_t effB = 0;
2647 fEffS = results->GetHist("MVA_TRAINEFF_S");
2648 for (Int_t bini=1; bini<=fNbins; bini++) {
2649
2650 // find cut value corresponding to a given signal efficiency
2651 Double_t effS = eff_bvss->GetBinCenter( bini );
2652
2653 Double_t cut = rootFinder.Root( effS );
2654
2655 // retrieve background efficiency for given cut
2656 if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2657 else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2658
2659 // and fill histograms
2660 eff_bvss->SetBinContent( bini, effB );
2661 rej_bvss->SetBinContent( bini, 1.0-effB );
2662 }
2663 fEffS = 0;
2664
2665 // create splines for histogram
2666 fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2667 }
2668
2669 // must exist...
2670 if (0 == fSplTrainEffBvsS) return 0.0;
2671
2672 // now find signal efficiency that corresponds to required background efficiency
2673 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2674 Int_t nbins_ = 1000;
2675 for (Int_t bini=1; bini<=nbins_; bini++) {
2676
2677 // get corresponding signal and background efficiencies
2678 effS = (bini - 0.5)/Float_t(nbins_);
2679 effB = fSplTrainEffBvsS->Eval( effS );
2680
2681 // find signal efficiency that corresponds to required background efficiency
2682 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2683 effS_ = effS;
2684 effB_ = effB;
2685 }
2686
2687 return 0.5*(effS + effS_); // the mean between bin above and bin below
2688}
2689
2690////////////////////////////////////////////////////////////////////////////////
2691
2692std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2693{
2694 Data()->SetCurrentType(Types::kTesting);
2695 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
2696 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2697
2698 purity.push_back(resMulticlass->GetAchievablePur());
2699 return resMulticlass->GetAchievableEff();
2700}
2701
2702////////////////////////////////////////////////////////////////////////////////
2703
2704std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2705{
2706 Data()->SetCurrentType(Types::kTraining);
2707 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTraining, Types::kMulticlass));
2708 if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2709
2710 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2711 for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2712 resMulticlass->GetBestMultiClassCuts(icls);
2713 }
2714
2715 purity.push_back(resMulticlass->GetAchievablePur());
2716 return resMulticlass->GetAchievableEff();
2717}
2718
2719////////////////////////////////////////////////////////////////////////////////
2720/// Construct a confusion matrix for a multiclass classifier. The confusion
2721/// matrix compares, in turn, each class agaist all other classes in a pair-wise
2722/// fashion. In rows with index \f$ k_r = 0 ... K \f$, \f$ k_r \f$ is
2723/// considered signal for the sake of comparison and for each column
2724/// \f$ k_c = 0 ... K \f$ the corresponding class is considered background.
2725///
2726/// Note that the diagonal elements will be returned as NaN since this will
2727/// compare a class against itself.
2728///
2729/// \see TMVA::ResultsMulticlass::GetConfusionMatrix
2730///
2731/// \param[in] effB The background efficiency for which to evaluate.
2732/// \param[in] type The data set on which to evaluate (training, testing ...).
2733///
2734/// \return A matrix containing signal efficiencies for the given background
2735/// efficiency. The diagonal elements are NaN since this measure is
2736/// meaningless (comparing a class against itself).
2737///
2738
2740{
2741 if (GetAnalysisType() != Types::kMulticlass) {
2742 Log() << kFATAL << "Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2743 return TMatrixD(0, 0);
2744 }
2745
2746 Data()->SetCurrentType(type);
2747 ResultsMulticlass *resMulticlass =
2748 dynamic_cast<ResultsMulticlass *>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
2749
2750 if (resMulticlass == nullptr) {
2751 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2752 << "unable to create pointer in GetMulticlassEfficiency, exiting." << Endl;
2753 return TMatrixD(0, 0);
2754 }
2755
2756 return resMulticlass->GetConfusionMatrix(effB);
2757}
2758
2759////////////////////////////////////////////////////////////////////////////////
2760/// compute significance of mean difference
2761/// \f[
2762/// significance = \frac{|<S> - <B>|}{\sqrt{RMS_{S2} + RMS_{B2}}}
2763/// \f]
2764
2766{
2767 Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2768
2769 return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2770}
2771
2772////////////////////////////////////////////////////////////////////////////////
2773/// compute "separation" defined as
2774/// \f[
2775/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2776/// \f]
2777
2779{
2780 return gTools().GetSeparation( histoS, histoB );
2781}
2782
2783////////////////////////////////////////////////////////////////////////////////
2784/// compute "separation" defined as
2785/// \f[
2786/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2787/// \f]
2788
2790{
2791 // note, if zero pointers given, use internal pdf
2792 // sanity check first
2793 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2794 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2795 if (!pdfS) pdfS = fSplS;
2796 if (!pdfB) pdfB = fSplB;
2797
2798 if (!fSplS || !fSplB) {
2799 Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2800 << " fSplS or fSplB are not yet filled" << Endl;
2801 return 0;
2802 }else{
2803 return gTools().GetSeparation( *pdfS, *pdfB );
2804 }
2805}
2806
2807////////////////////////////////////////////////////////////////////////////////
2808/// calculate the area (integral) under the ROC curve as a
2809/// overall quality measure of the classification
2810
2812{
2813 // note, if zero pointers given, use internal pdf
2814 // sanity check first
2815 if ((!histS && histB) || (histS && !histB))
2816 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2817
2818 if (histS==0 || histB==0) return 0.;
2819
2820 TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2821 TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2822
2823
2824 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2825 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2826
2827 Double_t integral = 0;
2828 UInt_t nsteps = 1000;
2829 Double_t step = (xmax-xmin)/Double_t(nsteps);
2830 Double_t cut = xmin;
2831 for (UInt_t i=0; i<nsteps; i++) {
2832 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2833 cut+=step;
2834 }
2835 delete pdfS;
2836 delete pdfB;
2837 return integral*step;
2838}
2839
2840
2841////////////////////////////////////////////////////////////////////////////////
2842/// calculate the area (integral) under the ROC curve as a
2843/// overall quality measure of the classification
2844
2846{
2847 // note, if zero pointers given, use internal pdf
2848 // sanity check first
2849 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2850 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2851 if (!pdfS) pdfS = fSplS;
2852 if (!pdfB) pdfB = fSplB;
2853
2854 if (pdfS==0 || pdfB==0) return 0.;
2855
2856 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2857 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2858
2859 Double_t integral = 0;
2860 UInt_t nsteps = 1000;
2861 Double_t step = (xmax-xmin)/Double_t(nsteps);
2862 Double_t cut = xmin;
2863 for (UInt_t i=0; i<nsteps; i++) {
2864 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2865 cut+=step;
2866 }
2867 return integral*step;
2868}
2869
2870////////////////////////////////////////////////////////////////////////////////
2871/// plot significance, \f$ \frac{S}{\sqrt{S^2 + B^2}} \f$, curve for given number
2872/// of signal and background events; returns cut for maximum significance
2873/// also returned via reference is the maximum significance
2874
2876 Double_t BackgroundEvents,
2877 Double_t& max_significance_value ) const
2878{
2879 Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, Types::kMaxAnalysisType );
2880
2881 Double_t max_significance(0);
2882 Double_t effS(0),effB(0),significance(0);
2883 TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2884
2885 if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2886 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2887 << "Number of signal or background events is <= 0 ==> abort"
2888 << Endl;
2889 }
2890
2891 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2892 << SignalEvents/BackgroundEvents << Endl;
2893
2894 TH1* eff_s = results->GetHist("MVA_EFF_S");
2895 TH1* eff_b = results->GetHist("MVA_EFF_B");
2896
2897 if ( (eff_s==0) || (eff_b==0) ) {
2898 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2899 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2900 return 0;
2901 }
2902
2903 for (Int_t bin=1; bin<=fNbinsH; bin++) {
2904 effS = eff_s->GetBinContent( bin );
2905 effB = eff_b->GetBinContent( bin );
2906
2907 // put significance into a histogram
2908 significance = sqrt(SignalEvents)*( effS )/sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
2909
2910 temp_histogram->SetBinContent(bin,significance);
2911 }
2912
2913 // find maximum in histogram
2914 max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
2915 max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
2916
2917 // delete
2918 delete temp_histogram;
2919
2920 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
2921 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
2922
2923 return max_significance;
2924}
2925
2926////////////////////////////////////////////////////////////////////////////////
2927/// calculates rms,mean, xmin, xmax of the event variable
2928/// this can be either done for the variables as they are or for
2929/// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
2930
2932 Double_t& meanS, Double_t& meanB,
2933 Double_t& rmsS, Double_t& rmsB,
2935{
2936 Types::ETreeType previousTreeType = Data()->GetCurrentType();
2937 Data()->SetCurrentType(treeType);
2938
2939 Long64_t entries = Data()->GetNEvents();
2940
2941 // sanity check
2942 if (entries <=0)
2943 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
2944
2945 // index of the wanted variable
2946 UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
2947
2948 // first fill signal and background in arrays before analysis
2949 xmin = +DBL_MAX;
2950 xmax = -DBL_MAX;
2951 Long64_t nEventsS = -1;
2952 Long64_t nEventsB = -1;
2953
2954 // take into account event weights
2955 meanS = 0;
2956 meanB = 0;
2957 rmsS = 0;
2958 rmsB = 0;
2959 Double_t sumwS = 0, sumwB = 0;
2960
2961 // loop over all training events
2962 for (Int_t ievt = 0; ievt < entries; ievt++) {
2963
2964 const Event* ev = GetEvent(ievt);
2965
2966 Double_t theVar = ev->GetValue(varIndex);
2967 Double_t weight = ev->GetWeight();
2968
2969 if (DataInfo().IsSignal(ev)) {
2970 sumwS += weight;
2971 meanS += weight*theVar;
2972 rmsS += weight*theVar*theVar;
2973 }
2974 else {
2975 sumwB += weight;
2976 meanB += weight*theVar;
2977 rmsB += weight*theVar*theVar;
2978 }
2979 xmin = TMath::Min( xmin, theVar );
2980 xmax = TMath::Max( xmax, theVar );
2981 }
2982 ++nEventsS;
2983 ++nEventsB;
2984
2985 meanS = meanS/sumwS;
2986 meanB = meanB/sumwB;
2987 rmsS = TMath::Sqrt( rmsS/sumwS - meanS*meanS );
2988 rmsB = TMath::Sqrt( rmsB/sumwB - meanB*meanB );
2989
2990 Data()->SetCurrentType(previousTreeType);
2991}
2992
2993////////////////////////////////////////////////////////////////////////////////
2994/// create reader class for method (classification only at present)
2995
2996void TMVA::MethodBase::MakeClass( const TString& theClassFileName ) const
2997{
2998 // the default consists of
2999 TString classFileName = "";
3000 if (theClassFileName == "")
3001 classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
3002 else
3003 classFileName = theClassFileName;
3004
3005 TString className = TString("Read") + GetMethodName();
3006
3007 TString tfname( classFileName );
3008 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
3009 << "Creating standalone class: "
3010 << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
3011
3012 std::ofstream fout( classFileName );
3013 if (!fout.good()) { // file could not be opened --> Error
3014 Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
3015 }
3016
3017 // now create the class
3018 // preamble
3019 fout << "// Class: " << className << std::endl;
3020 fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
3021
3022 // print general information and configuration state
3023 fout << std::endl;
3024 fout << "/* configuration options =====================================================" << std::endl << std::endl;
3025 WriteStateToStream( fout );
3026 fout << std::endl;
3027 fout << "============================================================================ */" << std::endl;
3028
3029 // generate the class
3030 fout << "" << std::endl;
3031 fout << "#include <array>" << std::endl;
3032 fout << "#include <vector>" << std::endl;
3033 fout << "#include <cmath>" << std::endl;
3034 fout << "#include <string>" << std::endl;
3035 fout << "#include <iostream>" << std::endl;
3036 fout << "" << std::endl;
3037 // now if the classifier needs to write some additional classes for its response implementation
3038 // this code goes here: (at least the header declarations need to come before the main class
3039 this->MakeClassSpecificHeader( fout, className );
3040
3041 fout << "#ifndef IClassifierReader__def" << std::endl;
3042 fout << "#define IClassifierReader__def" << std::endl;
3043 fout << std::endl;
3044 fout << "class IClassifierReader {" << std::endl;
3045 fout << std::endl;
3046 fout << " public:" << std::endl;
3047 fout << std::endl;
3048 fout << " // constructor" << std::endl;
3049 fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
3050 fout << " virtual ~IClassifierReader() {}" << std::endl;
3051 fout << std::endl;
3052 fout << " // return classifier response" << std::endl;
3053 if(GetAnalysisType() == Types::kMulticlass) {
3054 fout << " virtual std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3055 } else {
3056 fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3057 }
3058 fout << std::endl;
3059 fout << " // returns classifier status" << std::endl;
3060 fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3061 fout << std::endl;
3062 fout << " protected:" << std::endl;
3063 fout << std::endl;
3064 fout << " bool fStatusIsClean;" << std::endl;
3065 fout << "};" << std::endl;
3066 fout << std::endl;
3067 fout << "#endif" << std::endl;
3068 fout << std::endl;
3069 fout << "class " << className << " : public IClassifierReader {" << std::endl;
3070 fout << std::endl;
3071 fout << " public:" << std::endl;
3072 fout << std::endl;
3073 fout << " // constructor" << std::endl;
3074 fout << " " << className << "( std::vector<std::string>& theInputVars )" << std::endl;
3075 fout << " : IClassifierReader()," << std::endl;
3076 fout << " fClassName( \"" << className << "\" )," << std::endl;
3077 fout << " fNvars( " << GetNvar() << " )" << std::endl;
3078 fout << " {" << std::endl;
3079 fout << " // the training input variables" << std::endl;
3080 fout << " const char* inputVars[] = { ";
3081 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3082 fout << "\"" << GetOriginalVarName(ivar) << "\"";
3083 if (ivar<GetNvar()-1) fout << ", ";
3084 }
3085 fout << " };" << std::endl;
3086 fout << std::endl;
3087 fout << " // sanity checks" << std::endl;
3088 fout << " if (theInputVars.size() <= 0) {" << std::endl;
3089 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3090 fout << " fStatusIsClean = false;" << std::endl;
3091 fout << " }" << std::endl;
3092 fout << std::endl;
3093 fout << " if (theInputVars.size() != fNvars) {" << std::endl;
3094 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3095 fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3096 fout << " fStatusIsClean = false;" << std::endl;
3097 fout << " }" << std::endl;
3098 fout << std::endl;
3099 fout << " // validate input variables" << std::endl;
3100 fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3101 fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3102 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3103 fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3104 fout << " fStatusIsClean = false;" << std::endl;
3105 fout << " }" << std::endl;
3106 fout << " }" << std::endl;
3107 fout << std::endl;
3108 fout << " // initialize min and max vectors (for normalisation)" << std::endl;
3109 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3110 fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
3111 fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
3112 }
3113 fout << std::endl;
3114 fout << " // initialize input variable types" << std::endl;
3115 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3116 fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3117 }
3118 fout << std::endl;
3119 fout << " // initialize constants" << std::endl;
3120 fout << " Initialize();" << std::endl;
3121 fout << std::endl;
3122 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3123 fout << " // initialize transformation" << std::endl;
3124 fout << " InitTransform();" << std::endl;
3125 }
3126 fout << " }" << std::endl;
3127 fout << std::endl;
3128 fout << " // destructor" << std::endl;
3129 fout << " virtual ~" << className << "() {" << std::endl;
3130 fout << " Clear(); // method-specific" << std::endl;
3131 fout << " }" << std::endl;
3132 fout << std::endl;
3133 fout << " // the classifier response" << std::endl;
3134 fout << " // \"inputValues\" is a vector of input values in the same order as the" << std::endl;
3135 fout << " // variables given to the constructor" << std::endl;
3136 if(GetAnalysisType() == Types::kMulticlass) {
3137 fout << " std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const override;" << std::endl;
3138 } else {
3139 fout << " double GetMvaValue( const std::vector<double>& inputValues ) const override;" << std::endl;
3140 }
3141 fout << std::endl;
3142 fout << " private:" << std::endl;
3143 fout << std::endl;
3144 fout << " // method-specific destructor" << std::endl;
3145 fout << " void Clear();" << std::endl;
3146 fout << std::endl;
3147 if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3148 fout << " // input variable transformation" << std::endl;
3149 GetTransformationHandler().MakeFunction(fout, className,1);
3150 fout << " void InitTransform();" << std::endl;
3151 fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3152 fout << std::endl;
3153 }
3154 fout << " // common member variables" << std::endl;
3155 fout << " const char* fClassName;" << std::endl;
3156 fout << std::endl;
3157 fout << " const size_t fNvars;" << std::endl;
3158 fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3159 fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3160 fout << std::endl;
3161 fout << " // normalisation of input variables" << std::endl;
3162 fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3163 fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3164 fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3165 fout << " // normalise to output range: [-1, 1]" << std::endl;
3166 fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3167 fout << " }" << std::endl;
3168 fout << std::endl;
3169 fout << " // type of input variable: 'F' or 'I'" << std::endl;
3170 fout << " char fType[" << GetNvar() << "];" << std::endl;
3171 fout << std::endl;
3172 fout << " // initialize internal variables" << std::endl;
3173 fout << " void Initialize();" << std::endl;
3174 if(GetAnalysisType() == Types::kMulticlass) {
3175 fout << " std::vector<double> GetMulticlassValues__( const std::vector<double>& inputValues ) const;" << std::endl;
3176 } else {
3177 fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3178 }
3179 fout << "" << std::endl;
3180 fout << " // private members (method specific)" << std::endl;
3181
3182 // call the classifier specific output (the classifier must close the class !)
3183 MakeClassSpecific( fout, className );
3184
3185 if(GetAnalysisType() == Types::kMulticlass) {
3186 fout << "inline std::vector<double> " << className << "::GetMulticlassValues( const std::vector<double>& inputValues ) const" << std::endl;
3187 } else {
3188 fout << "inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3189 }
3190 fout << "{" << std::endl;
3191 fout << " // classifier response value" << std::endl;
3192 if(GetAnalysisType() == Types::kMulticlass) {
3193 fout << " std::vector<double> retval;" << std::endl;
3194 } else {
3195 fout << " double retval = 0;" << std::endl;
3196 }
3197 fout << std::endl;
3198 fout << " // classifier response, sanity check first" << std::endl;
3199 fout << " if (!IsStatusClean()) {" << std::endl;
3200 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3201 fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3202 fout << " }" << std::endl;
3203 fout << " else {" << std::endl;
3204 if (IsNormalised()) {
3205 fout << " // normalise variables" << std::endl;
3206 fout << " std::vector<double> iV;" << std::endl;
3207 fout << " iV.reserve(inputValues.size());" << std::endl;
3208 fout << " int ivar = 0;" << std::endl;
3209 fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3210 fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3211 fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3212 fout << " }" << std::endl;
3213 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3214 GetMethodType() != Types::kHMatrix) {
3215 fout << " Transform( iV, -1 );" << std::endl;
3216 }
3217
3218 if(GetAnalysisType() == Types::kMulticlass) {
3219 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3220 } else {
3221 fout << " retval = GetMvaValue__( iV );" << std::endl;
3222 }
3223 } else {
3224 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3225 GetMethodType() != Types::kHMatrix) {
3226 fout << " std::vector<double> iV(inputValues);" << std::endl;
3227 fout << " Transform( iV, -1 );" << std::endl;
3228 if(GetAnalysisType() == Types::kMulticlass) {
3229 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3230 } else {
3231 fout << " retval = GetMvaValue__( iV );" << std::endl;
3232 }
3233 } else {
3234 if(GetAnalysisType() == Types::kMulticlass) {
3235 fout << " retval = GetMulticlassValues__( inputValues );" << std::endl;
3236 } else {
3237 fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3238 }
3239 }
3240 }
3241 fout << " }" << std::endl;
3242 fout << std::endl;
3243 fout << " return retval;" << std::endl;
3244 fout << "}" << std::endl;
3245
3246 // create output for transformation - if any
3247 if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3248 GetTransformationHandler().MakeFunction(fout, className,2);
3249
3250 // close the file
3251 fout.close();
3252}
3253
3254////////////////////////////////////////////////////////////////////////////////
3255/// prints out method-specific help method
3256
3258{
3259 // if options are written to reference file, also append help info
3260 std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3261 std::ofstream* o = 0;
3262 if (gConfig().WriteOptionsReference()) {
3263 Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3264 o = new std::ofstream( GetReferenceFile(), std::ios::app );
3265 if (!o->good()) { // file could not be opened --> Error
3266 Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3267 }
3268 std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3269 }
3270
3271 // "|--------------------------------------------------------------|"
3272 if (!o) {
3273 Log() << kINFO << Endl;
3274 Log() << gTools().Color("bold")
3275 << "================================================================"
3276 << gTools().Color( "reset" )
3277 << Endl;
3278 Log() << gTools().Color("bold")
3279 << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3280 << gTools().Color( "reset" )
3281 << Endl;
3282 }
3283 else {
3284 Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3285 }
3286
3287 // print method-specific help message
3288 GetHelpMessage();
3289
3290 if (!o) {
3291 Log() << Endl;
3292 Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3293 Log() << gTools().Color("bold")
3294 << "================================================================"
3295 << gTools().Color( "reset" )
3296 << Endl;
3297 Log() << Endl;
3298 }
3299 else {
3300 // indicate END
3301 Log() << "# End of Message___" << Endl;
3302 }
3303
3304 std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3305 if (o) o->close();
3306}
3307
3308// ----------------------- r o o t f i n d i n g ----------------------------
3309
3310////////////////////////////////////////////////////////////////////////////////
3311/// returns efficiency as function of cut
3312
3314{
3315 Double_t retval=0;
3316
3317 // retrieve the class object
3319 retval = fSplRefS->Eval( theCut );
3320 }
3321 else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3322
3323 // caution: here we take some "forbidden" action to hide a problem:
3324 // in some cases, in particular for likelihood, the binned efficiency distributions
3325 // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3326 // unbinned information available in the trees, but the unbinned minimization is
3327 // too slow, and we don't need to do a precision measurement here. Hence, we force
3328 // this property.
3329 Double_t eps = 1.0e-5;
3330 if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3331 else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3332
3333 return retval;
3334}
3335
3336////////////////////////////////////////////////////////////////////////////////
3337/// returns the event collection (i.e. the dataset) TRANSFORMED using the
3338/// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3339
3341{
3342 // if there's no variable transformation for this classifier, just hand back the
3343 // event collection of the data set
3344 if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3345 return (Data()->GetEventCollection(type));
3346 }
3347
3348 // otherwise, transform ALL the events and hand back the vector of the pointers to the
3349 // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3350 // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3351 Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3352 if (fEventCollections.at(idx) == 0) {
3353 fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3354 fEventCollections.at(idx) = GetTransformationHandler().CalcTransformations(*(fEventCollections.at(idx)),kTRUE);
3355 }
3356 return *(fEventCollections.at(idx));
3357}
3358
3359////////////////////////////////////////////////////////////////////////////////
3360/// calculates the TMVA version string from the training version code on the fly
3361
3363{
3364 UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3365 UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3366 UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3367
3368 return TString(Form("%i.%i.%i",a,b,c));
3369}
3370
3371////////////////////////////////////////////////////////////////////////////////
3372/// calculates the ROOT version string from the training version code on the fly
3373
3375{
3376 UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3377 UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3378 UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3379
3380 return TString(Form("%i.%02i/%02i",a,b,c));
3381}
3382
3383////////////////////////////////////////////////////////////////////////////////
3384
3386 ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
3387 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
3388
3389 if (mvaRes != NULL) {
3390 TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3391 TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3392 TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3393 TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3394
3395 if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3396
3397 if (SorB == 's' || SorB == 'S')
3398 return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3399 else
3400 return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3401 }
3402 return -1;
3403}
const Bool_t Use_Splines_for_Eff_
const Int_t NBIN_HIST_HIGH
ROOT::R::TRInterface & r
Definition Object.C:4
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
#define s1(x)
Definition RSha256.hxx:91
#define ROOT_VERSION_CODE
Definition RVersion.h:21
int Int_t
Definition RtypesCore.h:45
char Char_t
Definition RtypesCore.h:37
const Bool_t kFALSE
Definition RtypesCore.h:92
bool Bool_t
Definition RtypesCore.h:63
double Double_t
Definition RtypesCore.h:59
long long Long64_t
Definition RtypesCore.h:73
float Float_t
Definition RtypesCore.h:57
const Bool_t kTRUE
Definition RtypesCore.h:91
#define ClassImp(name)
Definition Rtypes.h:364
char name[80]
Definition TGX11.cxx:110
int type
Definition TGX11.cxx:121
float xmin
float xmax
double sqrt(double)
TMatrixT< Double_t > TMatrixD
Definition TMatrixDfwd.h:23
char * Form(const char *fmt,...)
R__EXTERN TSystem * gSystem
Definition TSystem.h:559
#define TMVA_VERSION_CODE
Definition Version.h:47
Class to manage histogram axis.
Definition TAxis.h:30
Double_t GetXmax() const
Definition TAxis.h:134
Double_t GetXmin() const
Definition TAxis.h:133
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write all objects in this collection.
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
Definition TDatime.h:37
const char * AsString() const
Return the date & time as a string (ctime() format).
Definition TDatime.cxx:102
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.
Describe directory structure in memory.
Definition TDirectory.h:45
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
virtual Bool_t cd(const char *path=nullptr)
Change current directory to "this" directory.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:54
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3997
void Close(Option_t *option="") override
Close a file.
Definition TFile.cxx:879
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)}
Definition TH1.h:618
1-D histogram with a float per channel (see TH1 documentation)}
Definition TH1.h:575
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:58
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition TH1.cxx:8981
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum=0)
Compute Quantiles for this histogram Quantile x_q of a probability distribution Function F is defined...
Definition TH1.cxx:4543
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
Definition TH1.cxx:1257
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition TH1.cxx:7428
virtual void SetXTitle(const char *title)
Definition TH1.h:413
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition TH1.cxx:1282
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition TH1.h:320
virtual Double_t GetMaximum(Double_t maxval=FLT_MAX) const
Return maximum value smaller than maxval of bins in the range, unless the value has been overridden b...
Definition TH1.cxx:8390
virtual Int_t GetNbinsX() const
Definition TH1.h:296
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition TH1.cxx:3350
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition TH1.cxx:9062
virtual Int_t GetMaximumBin() const
Return location of bin with maximum value in the range.
Definition TH1.cxx:8420
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition TH1.cxx:4993
virtual void SetYTitle(const char *title)
Definition TH1.h:414
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition TH1.cxx:6564
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition TH1.cxx:3680
virtual Double_t KolmogorovTest(const TH1 *h2, Option_t *option="") const
Statistical test of compatibility in shape between this histogram and h2, using Kolmogorov test.
Definition TH1.cxx:8068
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
Definition TH1.cxx:8860
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition TH1.cxx:750
2-D histogram with a float per channel (see TH1 documentation)}
Definition TH2.h:251
Int_t Fill(Double_t)
Invalid Fill method.
Definition TH2.cxx:294
A doubly linked list.
Definition TList.h:44
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition TList.cxx:357
Class that contains all the information of a class.
Definition ClassInfo.h:49
UInt_t GetNumber() const
Definition ClassInfo.h:65
TString fWeightFileExtension
Definition Config.h:127
VariablePlotting & GetVariablePlotting()
Definition Config.h:99
class TMVA::Config::VariablePlotting fVariablePlotting
IONames & GetIONames()
Definition Config.h:100
MsgLogger * fLogger
Class that contains all the data information.
Definition DataSetInfo.h:62
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition Event.cxx:236
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition Event.cxx:381
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:391
Float_t GetTarget(UInt_t itgt) const
Definition Event.h:102
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:400
Interface for all concrete MVA method implementations.
Definition IMethod.h:53
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
IPythonInteractive()
standard constructor
~IPythonInteractive()
standard destructor
void ClearGraphs()
This function sets the point number to 0 for all graphs.
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Virtual base Class for all MVA method.
Definition MethodBase.h:111
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
void SetWeightFileDir(TString fileDir)
set directory of weight file
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables,...
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual Double_t GetSignificance() const
compute significance of mean difference
virtual Double_t GetProba(const Event *ev)
const char * GetName() const
Definition MethodBase.h:333
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
void PrintHelpMessage() const
prints out method-specific help method
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
void SetupMethod()
setup of methods
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
virtual ~MethodBase()
destructor
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, , curve for given number of signal and background events; returns cut for maximum ...
virtual Double_t GetTrainingEfficiency(const TString &)
void SetWeightFileName(TString)
set the weight file name (depreciated)
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
TString GetWeightFileName() const
retrieve weight file name
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------—
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void InitBase()
default initialization called by all constructors
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
void ReadStateFromFile()
Function to write options and weights to file.
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream.
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
void SetTestvarName(const TString &v="")
Definition MethodBase.h:340
void ReadVariablesFromXML(void *varnode)
read variable info from XML
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables,...
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity:
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters according to the argument This is just a dummy .
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
void AddVarsXMLTo(void *parent) const
write variable info to XML
void AddTargetsXMLTo(void *parent) const
write target info to XML
void ReadTargetsFromXML(void *tarnode)
read target info from XML
void ProcessBaseOptions()
the option string is decoded, for available options see "DeclareOptions"
void ReadStateFromXML(void *parent)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
void AddClassesXMLTo(void *parent) const
write class info to XML
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
void SetSource(const std::string &source)
Definition MsgLogger.h:70
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
Double_t GetXmin() const
Definition PDF.h:104
Double_t GetXmax() const
Definition PDF.h:105
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition PDF.cxx:701
@ kSpline3
Definition PDF.h:70
@ kSpline2
Definition PDF.h:70
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
Definition PDF.cxx:654
Class that is the base-class for a vector of result.
std::vector< Float_t > * GetValueVector()
void SetValue(Float_t value, Int_t ievt, Bool_t type)
set MVA response
Class which takes the results of a multiclass classification.
TMatrixD GetConfusionMatrix(Double_t effB)
Returns a confusion matrix where each class is pitted against each other.
Float_t GetAchievablePur(UInt_t cls)
std::vector< Double_t > GetBestMultiClassCuts(UInt_t targetClass)
calculate the best working point (optimal cut values) for the multiclass classifier
void CreateMulticlassHistos(TString prefix, Int_t nbins, Int_t nbins_high)
this function fills the mva response histos for multiclass classification
Float_t GetAchievableEff(UInt_t cls)
void CreateMulticlassPerformanceHistos(TString prefix)
Create performance graphs for this classifier a multiclass setting.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
Definition Results.h:57
Bool_t DoesExist(const TString &alias) const
Returns true if there is an object stored in the result for a given alias, false otherwise.
Definition Results.cxx:127
TH1 * GetHist(const TString &alias) const
Definition Results.cxx:136
TList * GetStorage() const
Definition Results.h:73
void Store(TObject *obj, const char *alias=0)
Definition Results.cxx:86
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition RootFinder.h:48
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Linear interpolation of TGraph.
Definition TSpline1.h:43
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
Double_t ElapsedSeconds(void)
computes elapsed tim in seconds
Definition Timer.cxx:137
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition Timer.cxx:146
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition Timer.cxx:202
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition Tools.cxx:214
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition Tools.cxx:413
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition Tools.cxx:1174
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition Tools.cxx:133
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition Tools.cxx:1136
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition Tools.cxx:601
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:840
void * GetChild(void *parent, const char *childname=0)
get child node
Definition Tools.cxx:1162
TXMLEngine & xmlengine()
Definition Tools.h:268
Bool_t CheckSplines(const TH1 *, const TSpline *)
check quality of splining by comparing splines and histograms in each bin
Definition Tools.cxx:491
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:335
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:353
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition Tools.cxx:395
Singleton class for Global types used by TMVA.
Definition Types.h:73
@ kBackground
Definition Types.h:138
@ kLikelihood
Definition Types.h:81
@ kHMatrix
Definition Types.h:83
@ kMulticlass
Definition Types.h:131
@ kNoAnalysisType
Definition Types.h:132
@ kClassification
Definition Types.h:129
@ kMaxAnalysisType
Definition Types.h:133
@ kRegression
Definition Types.h:130
@ kTraining
Definition Types.h:145
Linear interpolation class.
Gaussian Transformation of input variables.
Class for type info of MVA input variable.
void ReadFromXML(void *varnode)
read VariableInfo from stream
const TString & GetExpression() const
char GetVarType() const
void ReadFromStream(std::istream &istr)
read VariableInfo from stream
void AddToXML(void *varnode)
write class to XML
void SetExternalLink(void *p)
void * GetExternalLink() const
void BuildTransformationFromVarInfo(const std::vector< TMVA::VariableInfo > &var)
this method is only used when building a normalization transformation from old text files in this cas...
Linear interpolation class.
Linear interpolation class.
virtual void ReadTransformationFromStream(std::istream &istr, const TString &classname="")=0
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition TMultiGraph.h:36
virtual const char * GetName() const
Returns name of object.
Definition TNamed.h:47
Collectable string class.
Definition TObjString.h:28
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition TObject.cxx:798
Basic string class.
Definition TString.h:136
Ssiz_t Length() const
Definition TString.h:410
void ToLower()
Change string to lower-case.
Definition TString.cxx:1145
Int_t Atoi() const
Return integer value of string.
Definition TString.cxx:1941
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
Definition TString.cxx:2197
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition TString.cxx:1126
const char * Data() const
Definition TString.h:369
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition TString.h:692
@ kLeading
Definition TString.h:267
Ssiz_t Last(char c) const
Find last occurrence of a character c.
Definition TString.cxx:912
Bool_t IsNull() const
Definition TString.h:407
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2331
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:639
virtual const char * GetBuildNode() const
Return the build node name.
Definition TSystem.cxx:3890
virtual int mkdir(const char *name, Bool_t recursive=kFALSE)
Make a file system directory.
Definition TSystem.cxx:905
virtual const char * WorkingDirectory()
Return working directory.
Definition TSystem.cxx:870
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition TSystem.cxx:1597
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
TLine * line
Double_t x[n]
Definition legend1.C:17
TH1F * h1
Definition legend1.C:5
Config & gConfig()
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:158
Short_t Max(Short_t a, Short_t b)
Definition TMathBase.h:212
Double_t Sqrt(Double_t x)
Definition TMath.h:691
Short_t Min(Short_t a, Short_t b)
Definition TMathBase.h:180
Short_t Abs(Short_t d)
Definition TMathBase.h:120
TString fUser
Definition TSystem.h:141