Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Factory.cxx
Go to the documentation of this file.
1// @(#)Root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3// Updated by: Omar Zapata, Kim Albertsson
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : Factory *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors : *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
16 * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
17 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21 * Omar Zapata <Omar.Zapata@cern.ch> - UdeA/ITM Colombia *
22 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
23 * Sergei Gleyzer <Sergei.Gleyzer@cern.ch> - U of Florida & CERN *
24 * Kim Albertsson <kim.albertsson@cern.ch> - LTU & CERN *
25 * *
26 * Copyright (c) 2005-2015: *
27 * CERN, Switzerland *
28 * U. of Victoria, Canada *
29 * MPI-K Heidelberg, Germany *
30 * U. of Bonn, Germany *
31 * UdeA/ITM, Colombia *
32 * U. of Florida, USA *
33 * *
34 * Redistribution and use in source and binary forms, with or without *
35 * modification, are permitted according to the terms listed in LICENSE *
36 * (see tmva/doc/LICENSE) *
37 **********************************************************************************/
38
39/*! \class TMVA::Factory
40\ingroup TMVA
41
42This is the main MVA steering class.
43It creates all MVA methods, and guides them through the training, testing and
44evaluation phases.
45*/
46
47#include "TMVA/Factory.h"
48
50#include "TMVA/Config.h"
51#include "TMVA/Configurable.h"
52#include "TMVA/Tools.h"
53#include "TMVA/Ranking.h"
54#include "TMVA/DataSet.h"
55#include "TMVA/IMethod.h"
56#include "TMVA/MethodBase.h"
58#include "TMVA/DataSetManager.h"
59#include "TMVA/DataSetInfo.h"
60#include "TMVA/DataLoader.h"
61#include "TMVA/MethodBoost.h"
62#include "TMVA/MethodCategory.h"
63#include "TMVA/ROCCalc.h"
64#include "TMVA/ROCCurve.h"
65#include "TMVA/MsgLogger.h"
66
67#include "TMVA/VariableInfo.h"
69
70#include "TMVA/Results.h"
74#include <list>
75#include <bitset>
76#include <set>
77
78#include "TMVA/Types.h"
79
80#include "TROOT.h"
81#include "TFile.h"
82#include "TLeaf.h"
83#include "TEventList.h"
84#include "TH2.h"
85#include "TGraph.h"
86#include "TStyle.h"
87#include "TMatrixF.h"
88#include "TMatrixDSym.h"
89#include "TMultiGraph.h"
90#include "TPrincipal.h"
91#include "TMath.h"
92#include "TSystem.h"
93#include "TCanvas.h"
94
96// const Int_t MinNoTestEvents = 1;
97
98
99#define READXML kTRUE
100
101// number of bits for bitset
102#define VIBITS 32
103
104////////////////////////////////////////////////////////////////////////////////
105/// Standard constructor.
106///
107/// - jobname : this name will appear in all weight file names produced by the MVAs
108/// - theTargetFile : output ROOT file; the test tree and all evaluation plots
109/// will be stored here
110/// - theOption : option string; currently: "V" for verbose
111
113 : Configurable(theOption), fTransformations("I"), fVerbose(kFALSE), fVerboseLevel(kINFO), fCorrelations(kFALSE),
114 fROC(kTRUE), fSilentFile(theTargetFile == nullptr), fJobName(jobName), fAnalysisType(Types::kClassification),
115 fModelPersistence(kTRUE)
116{
117 fName = "Factory";
120
121 // render silent
122 if (gTools().CheckForSilentOption(GetOptions()))
123 Log().InhibitOutput(); // make sure is silent if wanted to
124
125 // init configurable
126 SetConfigDescription("Configuration options for Factory running");
128
129 // histograms are not automatically associated with the current
130 // directory and hence don't go out of scope when closing the file
131 // TH1::AddDirectory(kFALSE);
133#ifdef WIN32
134 // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these
135 // (would need different sequences..)
136 Bool_t color = kFALSE;
138#else
139 Bool_t color = !gROOT->IsBatch();
141#endif
142 DeclareOptionRef(fVerbose, "V", "Verbose flag");
143 DeclareOptionRef(fVerboseLevel = TString("Info"), "VerboseLevel", "VerboseLevel (Debug/Verbose/Info)");
144 AddPreDefVal(TString("Debug"));
145 AddPreDefVal(TString("Verbose"));
146 AddPreDefVal(TString("Info"));
147 DeclareOptionRef(color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)");
149 fTransformations, "Transformations",
150 "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, "
151 "decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations");
152 DeclareOptionRef(fCorrelations, "Correlations", "boolean to show correlation in output");
153 DeclareOptionRef(fROC, "ROC", "boolean to show ROC in output");
154 DeclareOptionRef(silent, "Silent",
155 "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory "
156 "class object (default: False)");
157 DeclareOptionRef(drawProgressBar, "DrawProgressBar",
158 "Draw progress bar to display training, testing and evaluation schedule (default: True)");
159 DeclareOptionRef(fModelPersistence, "ModelPersistence",
160 "Option to save the trained model in xml file or using serialization");
161
162 TString analysisType("Auto");
163 DeclareOptionRef(analysisType, "AnalysisType",
164 "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)");
165 AddPreDefVal(TString("Classification"));
166 AddPreDefVal(TString("Regression"));
167 AddPreDefVal(TString("Multiclass"));
168 AddPreDefVal(TString("Auto"));
169
170 ParseOptions();
172
173 if (Verbose())
174 fLogger->SetMinType(kVERBOSE);
175 if (fVerboseLevel.CompareTo("Debug") == 0)
176 fLogger->SetMinType(kDEBUG);
177 if (fVerboseLevel.CompareTo("Verbose") == 0)
178 fLogger->SetMinType(kVERBOSE);
179 if (fVerboseLevel.CompareTo("Info") == 0)
180 fLogger->SetMinType(kINFO);
181
182 // global settings
183 gConfig().SetUseColor(color);
186
187 analysisType.ToLower();
188 if (analysisType == "classification")
190 else if (analysisType == "regression")
192 else if (analysisType == "multiclass")
194 else if (analysisType == "auto")
196
197 // Greetings();
198}
199
200////////////////////////////////////////////////////////////////////////////////
201/// Constructor.
202
204 : Configurable(theOption), fTransformations("I"), fVerbose(kFALSE), fCorrelations(kFALSE), fROC(kTRUE),
205 fSilentFile(kTRUE), fJobName(jobName), fAnalysisType(Types::kClassification), fModelPersistence(kTRUE)
206{
207 fName = "Factory";
208 fgTargetFile = nullptr;
210
211 // render silent
212 if (gTools().CheckForSilentOption(GetOptions()))
213 Log().InhibitOutput(); // make sure is silent if wanted to
214
215 // init configurable
216 SetConfigDescription("Configuration options for Factory running");
218
219 // histograms are not automatically associated with the current
220 // directory and hence don't go out of scope when closing the file
223#ifdef WIN32
224 // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these
225 // (would need different sequences..)
226 Bool_t color = kFALSE;
228#else
229 Bool_t color = !gROOT->IsBatch();
231#endif
232 DeclareOptionRef(fVerbose, "V", "Verbose flag");
233 DeclareOptionRef(fVerboseLevel = TString("Info"), "VerboseLevel", "VerboseLevel (Debug/Verbose/Info)");
234 AddPreDefVal(TString("Debug"));
235 AddPreDefVal(TString("Verbose"));
236 AddPreDefVal(TString("Info"));
237 DeclareOptionRef(color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)");
239 fTransformations, "Transformations",
240 "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, "
241 "decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations");
242 DeclareOptionRef(fCorrelations, "Correlations", "boolean to show correlation in output");
243 DeclareOptionRef(fROC, "ROC", "boolean to show ROC in output");
244 DeclareOptionRef(silent, "Silent",
245 "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory "
246 "class object (default: False)");
247 DeclareOptionRef(drawProgressBar, "DrawProgressBar",
248 "Draw progress bar to display training, testing and evaluation schedule (default: True)");
249 DeclareOptionRef(fModelPersistence, "ModelPersistence",
250 "Option to save the trained model in xml file or using serialization");
251
252 TString analysisType("Auto");
253 DeclareOptionRef(analysisType, "AnalysisType",
254 "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)");
255 AddPreDefVal(TString("Classification"));
256 AddPreDefVal(TString("Regression"));
257 AddPreDefVal(TString("Multiclass"));
258 AddPreDefVal(TString("Auto"));
259
260 ParseOptions();
262
263 if (Verbose())
264 fLogger->SetMinType(kVERBOSE);
265 if (fVerboseLevel.CompareTo("Debug") == 0)
266 fLogger->SetMinType(kDEBUG);
267 if (fVerboseLevel.CompareTo("Verbose") == 0)
268 fLogger->SetMinType(kVERBOSE);
269 if (fVerboseLevel.CompareTo("Info") == 0)
270 fLogger->SetMinType(kINFO);
271
272 // global settings
273 gConfig().SetUseColor(color);
276
277 analysisType.ToLower();
278 if (analysisType == "classification")
280 else if (analysisType == "regression")
282 else if (analysisType == "multiclass")
284 else if (analysisType == "auto")
286
287 Greetings();
288}
289
290////////////////////////////////////////////////////////////////////////////////
291/// Print welcome message.
292/// Options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg
293
295{
296 gTools().ROOTVersionMessage(Log());
297 gTools().TMVAWelcomeMessage(Log(), gTools().kLogoWelcomeMsg);
298 gTools().TMVAVersionMessage(Log());
299 Log() << Endl;
300}
301
302////////////////////////////////////////////////////////////////////////////////
303/// Destructor.
304
306{
307 std::vector<TMVA::VariableTransformBase *>::iterator trfIt = fDefaultTrfs.begin();
308 for (; trfIt != fDefaultTrfs.end(); ++trfIt)
309 delete (*trfIt);
310
311 this->DeleteAllMethods();
312
313 // problem with call of REGISTER_METHOD macro ...
314 // ClassifierFactory::DestroyInstance();
315 // Types::DestroyInstance();
316 // Tools::DestroyInstance();
317 // Config::DestroyInstance();
318}
319
320////////////////////////////////////////////////////////////////////////////////
321/// Delete methods.
322
324{
325 std::map<TString, MVector *>::iterator itrMap;
326
327 for (itrMap = fMethodsMap.begin(); itrMap != fMethodsMap.end(); ++itrMap) {
328 MVector *methods = itrMap->second;
329 // delete methods
330 MVector::iterator itrMethod = methods->begin();
331 for (; itrMethod != methods->end(); ++itrMethod) {
332 Log() << kDEBUG << "Delete method: " << (*itrMethod)->GetName() << Endl;
333 delete (*itrMethod);
334 }
335 methods->clear();
336 delete methods;
337 }
338}
339
340////////////////////////////////////////////////////////////////////////////////
341
343{
344 fVerbose = v;
345}
346
347////////////////////////////////////////////////////////////////////////////////
348/// Books an MVA classifier or regression method. The option configuration
349/// string is custom for each MVA. The TString field `theNameAppendix` serves to
350/// define (and distinguish) several instances of a given MVA, e.g., when one
351/// wants to compare the performance of various configurations
352///
353/// The method is identified by `theMethodName`, which can be provided either as:
354/// - a string containing the method's name, or
355/// - a `TMVA::Types::EMVA` enum value (automatically converted to the corresponding method name).
356
358 TString methodTitle, TString theOption)
359{
360 if (fModelPersistence)
361 gSystem->MakeDirectory(loader->GetName()); // creating directory for DataLoader output
362
363 TString datasetname = loader->GetName();
364
365 if (fAnalysisType == Types::kNoAnalysisType) {
366 if (loader->GetDataSetInfo().GetNClasses() == 2 && loader->GetDataSetInfo().GetClassInfo("Signal") != NULL &&
367 loader->GetDataSetInfo().GetClassInfo("Background") != NULL) {
368 fAnalysisType = Types::kClassification; // default is classification
369 } else if (loader->GetDataSetInfo().GetNClasses() >= 2) {
370 fAnalysisType = Types::kMulticlass; // if two classes, but not named "Signal" and "Background"
371 } else
372 Log() << kFATAL << "No analysis type for " << loader->GetDataSetInfo().GetNClasses() << " classes and "
373 << loader->GetDataSetInfo().GetNTargets() << " regression targets." << Endl;
374 }
375
376 // booking via name; the names are translated into enums and the
377 // corresponding overloaded BookMethod is called
378
379 if (fMethodsMap.find(datasetname) != fMethodsMap.end()) {
380 if (GetMethod(datasetname, methodTitle) != 0) {
381 Log() << kFATAL << "Booking failed since method with title <" << methodTitle << "> already exists "
382 << "in with DataSet Name <" << loader->GetName() << "> " << Endl;
383 }
384 }
385
386 Log() << kHEADER << "Booking method: " << gTools().Color("bold")
387 << methodTitle
388 // << gTools().Color("reset")<<" DataSet Name: "<<gTools().Color("bold")<<loader->GetName()
389 << gTools().Color("reset") << Endl << Endl;
390
391 // interpret option string with respect to a request for boosting (i.e., BostNum > 0)
392 Int_t boostNum = 0;
394 conf->DeclareOptionRef(boostNum = 0, "Boost_num", "Number of times the classifier will be boosted");
395 conf->ParseOptions();
396 delete conf;
397 // this is name of weight file directory
399 if (fModelPersistence) {
400 // find prefix in fWeightFileDir;
402 fileDir = prefix;
403 if (!prefix.IsNull())
404 if (fileDir[fileDir.Length() - 1] != '/')
405 fileDir += "/";
406 fileDir += loader->GetName();
408 }
409 // initialize methods
410 IMethod *im;
411 if (!boostNum) {
412 im = ClassifierFactory::Instance().Create(theMethodName.tString().Data(), fJobName, methodTitle, loader->GetDataSetInfo(),
413 theOption);
414 } else {
415 // boosted classifier, requires a specific definition, making it transparent for the user
416 Log() << kDEBUG << "Boost Number is " << boostNum << " > 0: train boosted classifier" << Endl;
417 im = ClassifierFactory::Instance().Create("Boost", fJobName, methodTitle, loader->GetDataSetInfo(), theOption);
418 MethodBoost *methBoost = dynamic_cast<MethodBoost *>(im); // DSMTEST divided into two lines
419 if (!methBoost) { // DSMTEST
420 Log() << kFATAL << "Method with type kBoost cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
421 return nullptr;
422 }
423 if (fModelPersistence)
424 methBoost->SetWeightFileDir(fileDir);
425 methBoost->SetModelPersistence(fModelPersistence);
426 methBoost->SetBoostedMethodName(theMethodName.tString()); // DSMTEST divided into two lines
427 methBoost->fDataSetManager = loader->GetDataSetInfo().GetDataSetManager(); // DSMTEST
428 methBoost->SetFile(fgTargetFile);
429 methBoost->SetSilentFile(IsSilentFile());
430 }
431
432 MethodBase *method = dynamic_cast<MethodBase *>(im);
433 if (method == 0)
434 return 0; // could not create method
435
436 // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
437 if (method->GetMethodType() == Types::kCategory) { // DSMTEST
438 MethodCategory *methCat = (dynamic_cast<MethodCategory *>(im)); // DSMTEST
439 if (!methCat) { // DSMTEST
440 Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory"
441 << Endl; // DSMTEST
442 return nullptr;
443 }
444 if (fModelPersistence)
445 methCat->SetWeightFileDir(fileDir);
446 methCat->SetModelPersistence(fModelPersistence);
447 methCat->fDataSetManager = loader->GetDataSetInfo().GetDataSetManager(); // DSMTEST
448 methCat->SetFile(fgTargetFile);
449 methCat->SetSilentFile(IsSilentFile());
450 } // DSMTEST
451
452 if (!method->HasAnalysisType(fAnalysisType, loader->GetDataSetInfo().GetNClasses(),
453 loader->GetDataSetInfo().GetNTargets())) {
454 Log() << kWARNING << "Method " << method->GetMethodTypeName() << " is not capable of handling ";
455 if (fAnalysisType == Types::kRegression) {
456 Log() << "regression with " << loader->GetDataSetInfo().GetNTargets() << " targets." << Endl;
457 } else if (fAnalysisType == Types::kMulticlass) {
458 Log() << "multiclass classification with " << loader->GetDataSetInfo().GetNClasses() << " classes." << Endl;
459 } else {
460 Log() << "classification with " << loader->GetDataSetInfo().GetNClasses() << " classes." << Endl;
461 }
462 return 0;
463 }
464
465 if (fModelPersistence)
466 method->SetWeightFileDir(fileDir);
467 method->SetModelPersistence(fModelPersistence);
468 method->SetAnalysisType(fAnalysisType);
469 method->SetupMethod();
470 method->ParseOptions();
471 method->ProcessSetup();
472 method->SetFile(fgTargetFile);
473 method->SetSilentFile(IsSilentFile());
474
475 // check-for-unused-options is performed; may be overridden by derived classes
476 method->CheckSetup();
477
478 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
479 MVector *mvector = new MVector;
480 fMethodsMap[datasetname] = mvector;
481 }
482 fMethodsMap[datasetname]->push_back(method);
483 return method;
484}
485
486////////////////////////////////////////////////////////////////////////////////
487/// Adds an already constructed method to be managed by this factory.
488///
489/// \note Private.
490/// \note Know what you are doing when using this method. The method that you
491/// are loading could be trained already.
492///
493
496{
497 TString datasetname = loader->GetName();
498 std::string methodTypeName = std::string(Types::Instance().GetMethodName(methodType).Data());
499 DataSetInfo &dsi = loader->GetDataSetInfo();
500
502 MethodBase *method = (dynamic_cast<MethodBase *>(im));
503
504 if (method == nullptr)
505 return nullptr;
506
507 if (method->GetMethodType() == Types::kCategory) {
508 Log() << kERROR << "Cannot handle category methods for now." << Endl;
509 }
510
512 if (fModelPersistence) {
513 // find prefix in fWeightFileDir;
515 fileDir = prefix;
516 if (!prefix.IsNull())
517 if (fileDir[fileDir.Length() - 1] != '/')
518 fileDir += "/";
519 fileDir = loader->GetName();
521 }
522
523 if (fModelPersistence)
524 method->SetWeightFileDir(fileDir);
525 method->SetModelPersistence(fModelPersistence);
526 method->SetAnalysisType(fAnalysisType);
527 method->SetupMethod();
528 method->SetFile(fgTargetFile);
529 method->SetSilentFile(IsSilentFile());
530
531 method->DeclareCompatibilityOptions();
532
533 // read weight file
534 method->ReadStateFromFile();
535
536 // method->CheckSetup();
537
538 TString methodTitle = method->GetName();
539 if (HasMethod(datasetname, methodTitle) != 0) {
540 Log() << kFATAL << "Booking failed since method with title <" << methodTitle << "> already exists "
541 << "in with DataSet Name <" << loader->GetName() << "> " << Endl;
542 }
543
544 Log() << kINFO << "Booked classifier \"" << method->GetMethodName() << "\" of type: \""
545 << method->GetMethodTypeName() << "\"" << Endl;
546
547 if (fMethodsMap.count(datasetname) == 0) {
548 MVector *mvector = new MVector;
549 fMethodsMap[datasetname] = mvector;
550 }
551
552 fMethodsMap[datasetname]->push_back(method);
553
554 return method;
555}
556
557////////////////////////////////////////////////////////////////////////////////
558/// Returns pointer to MVA that corresponds to given method title.
559
561{
562 if (fMethodsMap.find(datasetname) == fMethodsMap.end())
563 return 0;
564
565 MVector *methods = fMethodsMap.find(datasetname)->second;
566
567 MVector::const_iterator itrMethod;
568 //
569 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
570 MethodBase *mva = dynamic_cast<MethodBase *>(*itrMethod);
571 if ((mva->GetMethodName()) == methodTitle)
572 return mva;
573 }
574 return 0;
575}
576
577////////////////////////////////////////////////////////////////////////////////
578/// Checks whether a given method name is defined for a given dataset.
579
581{
582 if (fMethodsMap.find(datasetname) == fMethodsMap.end())
583 return 0;
584
585 std::string methodName = methodTitle.Data();
586 auto isEqualToMethodName = [&methodName](TMVA::IMethod *m) { return (0 == methodName.compare(m->GetName())); };
587
588 TMVA::Factory::MVector *methods = this->fMethodsMap.at(datasetname);
590
592}
593
594////////////////////////////////////////////////////////////////////////////////
595
597{
598 RootBaseDir()->cd();
599
600 if (!RootBaseDir()->GetDirectory(fDataSetInfo.GetName()))
601 RootBaseDir()->mkdir(fDataSetInfo.GetName());
602 else
603 return; // loader is now in the output file, we dont need to save again
604
605 RootBaseDir()->cd(fDataSetInfo.GetName());
606 fDataSetInfo.GetDataSet(); // builds dataset (including calculation of correlation matrix)
607
608 // correlation matrix of the default DS
609 const TMatrixD *m(0);
610 const TH2 *h(0);
611
612 if (fAnalysisType == Types::kMulticlass) {
613 for (UInt_t cls = 0; cls < fDataSetInfo.GetNClasses(); cls++) {
614 m = fDataSetInfo.CorrelationMatrix(fDataSetInfo.GetClassInfo(cls)->GetName());
615 h = fDataSetInfo.CreateCorrelationMatrixHist(
616 m, TString("CorrelationMatrix") + fDataSetInfo.GetClassInfo(cls)->GetName(),
617 TString("Correlation Matrix (") + fDataSetInfo.GetClassInfo(cls)->GetName() + TString(")"));
618 if (h != 0) {
619 h->Write();
620 delete h;
621 }
622 }
623 } else {
624 m = fDataSetInfo.CorrelationMatrix("Signal");
625 h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixS", "Correlation Matrix (signal)");
626 if (h != 0) {
627 h->Write();
628 delete h;
629 }
630
631 m = fDataSetInfo.CorrelationMatrix("Background");
632 h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixB", "Correlation Matrix (background)");
633 if (h != 0) {
634 h->Write();
635 delete h;
636 }
637
638 m = fDataSetInfo.CorrelationMatrix("Regression");
639 h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrix", "Correlation Matrix");
640 if (h != 0) {
641 h->Write();
642 delete h;
643 }
644 }
645
646 // some default transformations to evaluate
647 // NOTE: all transformations are destroyed after this test
648 TString processTrfs = "I"; //"I;N;D;P;U;G,D;"
649
650 // plus some user defined transformations
651 processTrfs = fTransformations;
652
653 // remove any trace of identity transform - if given (avoid to apply it twice)
654 std::vector<TMVA::TransformationHandler *> trfs;
656
657 std::vector<TString> trfsDef = gTools().SplitString(processTrfs, ';');
658 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
659 for (; trfsDefIt != trfsDef.end(); ++trfsDefIt) {
660 trfs.push_back(new TMVA::TransformationHandler(fDataSetInfo, "Factory"));
661 TString trfS = (*trfsDefIt);
662
663 // Log() << kINFO << Endl;
664 Log() << kDEBUG << "current transformation string: '" << trfS.Data() << "'" << Endl;
665 TMVA::CreateVariableTransforms(trfS, fDataSetInfo, *(trfs.back()), Log());
666
667 if (trfS.BeginsWith('I'))
668 identityTrHandler = trfs.back();
669 }
670
671 const std::vector<Event *> &inputEvents = fDataSetInfo.GetDataSet()->GetEventCollection();
672
673 // apply all transformations
674 std::vector<TMVA::TransformationHandler *>::iterator trfIt = trfs.begin();
675
676 for (; trfIt != trfs.end(); ++trfIt) {
677 // setting a Root dir causes the variables distributions to be saved to the root file
678 (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.GetName())); // every dataloader have its own dir
679 (*trfIt)->CalcTransformations(inputEvents);
680 }
682 identityTrHandler->PrintVariableRanking();
683
684 // clean up
685 for (trfIt = trfs.begin(); trfIt != trfs.end(); ++trfIt)
686 delete *trfIt;
687}
688
689////////////////////////////////////////////////////////////////////////////////
690/// Iterates through all booked methods and sees if they use parameter tuning and if so
691/// does just that, i.e.\ calls "Method::Train()" for different parameter settings and
692/// keeps in mind the "optimal one"...\ and that's the one that will later on be used
693/// in the main training loop.
694
696{
697
698 std::map<TString, MVector *>::iterator itrMap;
699 std::map<TString, Double_t> TunedParameters;
700 for (itrMap = fMethodsMap.begin(); itrMap != fMethodsMap.end(); ++itrMap) {
701 MVector *methods = itrMap->second;
702
703 MVector::iterator itrMethod;
704
705 // iterate over methods and optimize
706 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
708 MethodBase *mva = dynamic_cast<MethodBase *>(*itrMethod);
709 if (!mva) {
710 Log() << kFATAL << "Dynamic cast to MethodBase failed" << Endl;
711 return TunedParameters;
712 }
713
714 if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
715 Log() << kWARNING << "Method " << mva->GetMethodName() << " not trained (training tree has less entries ["
716 << mva->Data()->GetNTrainingEvents() << "] than required [" << MinNoTrainingEvents << "]" << Endl;
717 continue;
718 }
719
720 Log() << kINFO << "Optimize method: " << mva->GetMethodName() << " for "
721 << (fAnalysisType == Types::kRegression
722 ? "Regression"
723 : (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification"))
724 << Endl;
725
726 TunedParameters = mva->OptimizeTuningParameters(fomType, fitType);
727 Log() << kINFO << "Optimization of tuning parameters finished for Method:" << mva->GetName() << Endl;
728 }
729 }
730
731 return TunedParameters;
732}
733
734////////////////////////////////////////////////////////////////////////////////
735/// Private method to generate a ROCCurve instance for a given method.
736/// Handles the conversion from TMVA ResultSet to a format the ROCCurve class
737/// understands.
738///
739/// \note You own the retured pointer.
740///
741
747
748////////////////////////////////////////////////////////////////////////////////
749/// Private method to generate a ROCCurve instance for a given method.
750/// Handles the conversion from TMVA ResultSet to a format the ROCCurve class
751/// understands.
752///
753/// \note You own the retured pointer.
754///
755
757{
758 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
759 Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
760 return nullptr;
761 }
762
763 if (!this->HasMethod(datasetname, theMethodName)) {
764 Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data())
765 << Endl;
766 return nullptr;
767 }
768
769 std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
770 if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
771 Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.")
772 << Endl;
773 return nullptr;
774 }
775
776 TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(this->GetMethod(datasetname, theMethodName));
777 TMVA::DataSet *dataset = method->Data();
778 dataset->SetCurrentType(type);
779 TMVA::Results *results = dataset->GetResults(theMethodName, type, this->fAnalysisType);
780
781 UInt_t nClasses = method->DataInfo().GetNClasses();
782 if (this->fAnalysisType == Types::kMulticlass && iClass >= nClasses) {
783 Log() << kERROR
784 << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass,
785 nClasses)
786 << Endl;
787 return nullptr;
788 }
789
790 TMVA::ROCCurve *rocCurve = nullptr;
791 if (this->fAnalysisType == Types::kClassification) {
792
793 std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
794 std::vector<Bool_t> *mvaResTypes = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
795 std::vector<Float_t> mvaResWeights;
796
797 auto eventCollection = dataset->GetEventCollection(type);
798 mvaResWeights.reserve(eventCollection.size());
799 for (auto ev : eventCollection) {
800 mvaResWeights.push_back(ev->GetWeight());
801 }
802
804
805 } else if (this->fAnalysisType == Types::kMulticlass) {
806 std::vector<Float_t> mvaRes;
807 std::vector<Bool_t> mvaResTypes;
808 std::vector<Float_t> mvaResWeights;
809
810 std::vector<std::vector<Float_t>> *rawMvaRes = dynamic_cast<ResultsMulticlass *>(results)->GetValueVector();
811
812 // Vector transpose due to values being stored as
813 // [ [0, 1, 2], [0, 1, 2], ... ]
814 // in ResultsMulticlass::GetValueVector.
815 mvaRes.reserve(rawMvaRes->size());
816 for (auto item : *rawMvaRes) {
817 mvaRes.push_back(item[iClass]);
818 }
819
820 auto eventCollection = dataset->GetEventCollection(type);
821 mvaResTypes.reserve(eventCollection.size());
822 mvaResWeights.reserve(eventCollection.size());
823 for (auto ev : eventCollection) {
824 mvaResTypes.push_back(ev->GetClass() == iClass);
825 mvaResWeights.push_back(ev->GetWeight());
826 }
827
829 }
830
831 return rocCurve;
832}
833
834////////////////////////////////////////////////////////////////////////////////
835/// Calculate the integral of the ROC curve, also known as the area under curve
836/// (AUC), for a given method.
837///
838/// Argument iClass specifies the class to generate the ROC curve in a
839/// multiclass setting. It is ignored for binary classification.
840///
841
847
848////////////////////////////////////////////////////////////////////////////////
849/// Calculate the integral of the ROC curve, also known as the area under curve
850/// (AUC), for a given method.
851///
852/// Argument iClass specifies the class to generate the ROC curve in a
853/// multiclass setting. It is ignored for binary classification.
854///
855
857{
858 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
859 Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
860 return 0;
861 }
862
863 if (!this->HasMethod(datasetname, theMethodName)) {
864 Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data())
865 << Endl;
866 return 0;
867 }
868
869 std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
870 if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
871 Log() << kERROR << Form("Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
872 << Endl;
873 return 0;
874 }
875
877 if (!rocCurve) {
878 Log() << kFATAL
879 << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(),
880 datasetname.Data())
881 << Endl;
882 return 0;
883 }
884
885 Int_t npoints = TMVA::gConfig().fVariablePlotting.fNbinsXOfROCCurve + 1;
886 Double_t rocIntegral = rocCurve->GetROCIntegral(npoints);
887 delete rocCurve;
888
889 return rocIntegral;
890}
891
892////////////////////////////////////////////////////////////////////////////////
893/// Argument iClass specifies the class to generate the ROC curve in a
894/// multiclass setting. It is ignored for binary classification.
895///
896/// Returns a ROC graph for a given method, or nullptr on error.
897///
898/// Note: Evaluation of the given method must have been run prior to ROC
899/// generation through Factory::EvaluateAllMetods.
900///
901/// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
902/// and the others considered background. This is ok in binary classification
903/// but in in multi class classification, the ROC surface is an N dimensional
904/// shape, where N is number of classes - 1.
905
911
912////////////////////////////////////////////////////////////////////////////////
913/// Argument iClass specifies the class to generate the ROC curve in a
914/// multiclass setting. It is ignored for binary classification.
915///
916/// Returns a ROC graph for a given method, or nullptr on error.
917///
918/// Note: Evaluation of the given method must have been run prior to ROC
919/// generation through Factory::EvaluateAllMetods.
920///
921/// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
922/// and the others considered background. This is ok in binary classification
923/// but in in multi class classification, the ROC surface is an N dimensional
924/// shape, where N is number of classes - 1.
925
928{
929 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
930 Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
931 return nullptr;
932 }
933
934 if (!this->HasMethod(datasetname, theMethodName)) {
935 Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data())
936 << Endl;
937 return nullptr;
938 }
939
940 std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
941 if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
942 Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.")
943 << Endl;
944 return nullptr;
945 }
946
948 TGraph *graph = nullptr;
949
950 if (!rocCurve) {
951 Log() << kFATAL
952 << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(),
953 datasetname.Data())
954 << Endl;
955 return nullptr;
956 }
957
958 graph = (TGraph *)rocCurve->GetROCCurve()->Clone();
959 delete rocCurve;
960
961 if (setTitles) {
962 graph->GetYaxis()->SetTitle("Background rejection (Specificity)");
963 graph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
964 graph->SetTitle(TString::Format("Signal efficiency vs. Background rejection (%s)", theMethodName.Data()).Data());
965 }
966
967 return graph;
968}
969
970////////////////////////////////////////////////////////////////////////////////
971/// Generate a collection of graphs, for all methods for a given class. Suitable
972/// for comparing method performance.
973///
974/// Argument iClass specifies the class to generate the ROC curve in a
975/// multiclass setting. It is ignored for binary classification.
976///
977/// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
978/// and the others considered background. This is ok in binary classification
979/// but in in multi class classification, the ROC surface is an N dimensional
980/// shape, where N is number of classes - 1.
981
986
987////////////////////////////////////////////////////////////////////////////////
988/// Generate a collection of graphs, for all methods for a given class. Suitable
989/// for comparing method performance.
990///
991/// Argument iClass specifies the class to generate the ROC curve in a
992/// multiclass setting. It is ignored for binary classification.
993///
994/// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
995/// and the others considered background. This is ok in binary classification
996/// but in in multi class classification, the ROC surface is an N dimensional
997/// shape, where N is number of classes - 1.
998
1000{
1001 UInt_t line_color = 1;
1002
1004
1005 MVector *methods = fMethodsMap[datasetname.Data()];
1006 for (auto *method_raw : *methods) {
1008 if (method == nullptr) {
1009 continue;
1010 }
1011
1012 TString methodName = method->GetMethodName();
1013 UInt_t nClasses = method->DataInfo().GetNClasses();
1014
1015 if (this->fAnalysisType == Types::kMulticlass && iClass >= nClasses) {
1016 Log() << kERROR
1017 << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass,
1018 nClasses)
1019 << Endl;
1020 continue;
1021 }
1022
1023 TString className = method->DataInfo().GetClassInfo(iClass)->GetName();
1024
1025 TGraph *graph = this->GetROCCurve(datasetname, methodName, false, iClass, type);
1026 graph->SetTitle(methodName);
1027
1028 graph->SetLineWidth(2);
1029 graph->SetLineColor(line_color++);
1030 graph->SetFillColor(10);
1031
1032 multigraph->Add(graph);
1033 }
1034
1035 if (multigraph->GetListOfGraphs() == nullptr) {
1036 Log() << kERROR << Form("No metohds have class %i defined.", iClass) << Endl;
1037 return nullptr;
1038 }
1039
1040 return multigraph;
1041}
1042
1043////////////////////////////////////////////////////////////////////////////////
1044/// Draws ROC curves for all methods booked with the factory for a given class
1045/// onto a canvas.
1046///
1047/// Argument iClass specifies the class to generate the ROC curve in a
1048/// multiclass setting. It is ignored for binary classification.
1049///
1050/// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
1051/// and the others considered background. This is ok in binary classification
1052/// but in in multi class classification, the ROC surface is an N dimensional
1053/// shape, where N is number of classes - 1.
1054
1059
1060////////////////////////////////////////////////////////////////////////////////
1061/// Draws ROC curves for all methods booked with the factory for a given class.
1062///
1063/// Argument iClass specifies the class to generate the ROC curve in a
1064/// multiclass setting. It is ignored for binary classification.
1065///
1066/// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
1067/// and the others considered background. This is ok in binary classification
1068/// but in in multi class classification, the ROC surface is an N dimensional
1069/// shape, where N is number of classes - 1.
1070
1072{
1073 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
1074 Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
1075 return 0;
1076 }
1077
1078 TString name = TString::Format("ROCCurve %s class %i", datasetname.Data(), iClass);
1079 TCanvas *canvas = new TCanvas(name.Data(), "ROC Curve", 200, 10, 700, 500);
1080 canvas->SetGrid();
1081
1082 TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass, type);
1083
1084 if (multigraph) {
1085 multigraph->Draw("AL");
1086
1087 multigraph->GetYaxis()->SetTitle("Background rejection (Specificity)");
1088 multigraph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
1089
1090 TString titleString = TString::Format("Signal efficiency vs. Background rejection");
1091 if (this->fAnalysisType == Types::kMulticlass) {
1092 titleString = TString::Format("%s (Class=%i)", titleString.Data(), iClass);
1093 }
1094
1095 // Workaround for TMultigraph not drawing title correctly.
1096 multigraph->GetHistogram()->SetTitle(titleString.Data());
1097 multigraph->SetTitle(titleString.Data());
1098
1099 canvas->BuildLegend(0.15, 0.15, 0.35, 0.3, "MVA Method");
1100 }
1101
1102 return canvas;
1103}
1104
1105////////////////////////////////////////////////////////////////////////////////
1106/// Iterates through all booked methods and calls training
1107
1109{
1110 Log() << kHEADER << gTools().Color("bold") << "Train all methods" << gTools().Color("reset") << Endl;
1111 // iterates over all MVAs that have been booked, and calls their training methods
1112
1113 // don't do anything if no method booked
1114 if (fMethodsMap.empty()) {
1115 Log() << kINFO << "...nothing found to train" << Endl;
1116 return;
1117 }
1118
1119 // here the training starts
1120 // Log() << kINFO << " " << Endl;
1121 Log() << kDEBUG << "Train all methods for "
1122 << (fAnalysisType == Types::kRegression
1123 ? "Regression"
1124 : (fAnalysisType == Types::kMulticlass ? "Multiclass" : "Classification"))
1125 << " ..." << Endl;
1126
1127 std::map<TString, MVector *>::iterator itrMap;
1128
1129 for (itrMap = fMethodsMap.begin(); itrMap != fMethodsMap.end(); ++itrMap) {
1130 MVector *methods = itrMap->second;
1131 MVector::iterator itrMethod;
1132
1133 // iterate over methods and train
1134 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1136 MethodBase *mva = dynamic_cast<MethodBase *>(*itrMethod);
1137
1138 if (mva == 0)
1139 continue;
1140
1141 if (mva->DataInfo().GetDataSetManager()->DataInput().GetEntries() <=
1142 1) { // 0 entries --> 0 events, 1 entry --> dynamical dataset (or one entry)
1143 Log() << kFATAL << "No input data for the training provided!" << Endl;
1144 }
1145
1146 if (fAnalysisType == Types::kRegression && mva->DataInfo().GetNTargets() < 1)
1147 Log() << kFATAL << "You want to do regression training without specifying a target." << Endl;
1148 else if ((fAnalysisType == Types::kMulticlass || fAnalysisType == Types::kClassification) &&
1149 mva->DataInfo().GetNClasses() < 2)
1150 Log() << kFATAL << "You want to do classification training, but specified less than two classes." << Endl;
1151
1152 // first print some information about the default dataset
1153 if (!IsSilentFile())
1154 WriteDataInformation(mva->fDataSetInfo);
1155
1156 if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
1157 Log() << kWARNING << "Method " << mva->GetMethodName() << " not trained (training tree has less entries ["
1158 << mva->Data()->GetNTrainingEvents() << "] than required [" << MinNoTrainingEvents << "]" << Endl;
1159 continue;
1160 }
1161
1162 Log() << kHEADER << "Train method: " << mva->GetMethodName() << " for "
1163 << (fAnalysisType == Types::kRegression
1164 ? "Regression"
1165 : (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification"))
1166 << Endl << Endl;
1167 mva->TrainMethod();
1168 Log() << kHEADER << "Training finished" << Endl << Endl;
1169 }
1170
1171 if (fAnalysisType != Types::kRegression) {
1172
1173 // variable ranking
1174 // Log() << Endl;
1175 Log() << kINFO << "Ranking input variables (method specific)..." << Endl;
1176 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1177 MethodBase *mva = dynamic_cast<MethodBase *>(*itrMethod);
1178 if (mva && mva->Data()->GetNTrainingEvents() >= MinNoTrainingEvents) {
1179
1180 // create and print ranking
1181 const Ranking *ranking = (*itrMethod)->CreateRanking();
1182 if (ranking != 0)
1183 ranking->Print();
1184 else
1185 Log() << kINFO << "No variable ranking supplied by classifier: "
1186 << dynamic_cast<MethodBase *>(*itrMethod)->GetMethodName() << Endl;
1187 }
1188 }
1189 }
1190
1191 // save training history in case we are not in the silent mode
1192 if (!IsSilentFile()) {
1193 for (UInt_t i = 0; i < methods->size(); i++) {
1194 MethodBase *m = dynamic_cast<MethodBase *>((*methods)[i]);
1195 if (m == 0)
1196 continue;
1197 m->BaseDir()->cd();
1198 m->fTrainHistory.SaveHistory(m->GetMethodName());
1199 }
1200 }
1201
1202 // delete all methods and recreate them from weight file - this ensures that the application
1203 // of the methods (in TMVAClassificationApplication) is consistent with the results obtained
1204 // in the testing
1205 // Log() << Endl;
1206 if (fModelPersistence) {
1207
1208 Log() << kHEADER << "=== Destroy and recreate all methods via weight files for testing ===" << Endl << Endl;
1209
1210 if (!IsSilentFile())
1211 RootBaseDir()->cd();
1212
1213 // iterate through all booked methods
1214 for (UInt_t i = 0; i < methods->size(); i++) {
1215
1216 MethodBase *m = dynamic_cast<MethodBase *>((*methods)[i]);
1217 if (m == nullptr)
1218 continue;
1219
1220 TMVA::Types::EMVA methodType = m->GetMethodType();
1221 TString weightfile = m->GetWeightFileName();
1222
1223 // decide if .txt or .xml file should be read:
1224 if (READXML)
1225 weightfile.ReplaceAll(".txt", ".xml");
1226
1227 DataSetInfo &dataSetInfo = m->DataInfo();
1228 TString testvarName = m->GetTestvarName();
1229 delete m; // itrMethod[i];
1230
1231 // recreate
1232 m = dynamic_cast<MethodBase *>(ClassifierFactory::Instance().Create(
1233 Types::Instance().GetMethodName(methodType).Data(), dataSetInfo, weightfile));
1234 if (m->GetMethodType() == Types::kCategory) {
1235 MethodCategory *methCat = (dynamic_cast<MethodCategory *>(m));
1236 if (!methCat)
1237 Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl;
1238 else
1239 methCat->fDataSetManager = m->DataInfo().GetDataSetManager();
1240 }
1241 // ToDo, Do we need to fill the DataSetManager of MethodBoost here too?
1242
1243 TString wfileDir = m->DataInfo().GetName();
1245 m->SetWeightFileDir(wfileDir);
1246 m->SetModelPersistence(fModelPersistence);
1247 m->SetSilentFile(IsSilentFile());
1248 m->SetAnalysisType(fAnalysisType);
1249 m->SetupMethod();
1250 m->ReadStateFromFile();
1251 m->SetTestvarName(testvarName);
1252
1253 // replace trained method by newly created one (from weight file) in methods vector
1254 (*methods)[i] = m;
1255 }
1256 }
1257 }
1258}
1259
1260////////////////////////////////////////////////////////////////////////////////
1261/// Evaluates all booked methods on the testing data and adds the output to the
1262/// Results in the corresponiding DataSet.
1263///
1264
1266{
1267 Log() << kHEADER << gTools().Color("bold") << "Test all methods" << gTools().Color("reset") << Endl;
1268
1269 // don't do anything if no method booked
1270 if (fMethodsMap.empty()) {
1271 Log() << kINFO << "...nothing found to test" << Endl;
1272 return;
1273 }
1274 std::map<TString, MVector *>::iterator itrMap;
1275
1276 for (itrMap = fMethodsMap.begin(); itrMap != fMethodsMap.end(); ++itrMap) {
1277 MVector *methods = itrMap->second;
1278 MVector::iterator itrMethod;
1279
1280 // iterate over methods and test
1281 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1283 MethodBase *mva = dynamic_cast<MethodBase *>(*itrMethod);
1284 if (mva == 0)
1285 continue;
1286 Types::EAnalysisType analysisType = mva->GetAnalysisType();
1287 Log() << kHEADER << "Test method: " << mva->GetMethodName() << " for "
1288 << (analysisType == Types::kRegression
1289 ? "Regression"
1290 : (analysisType == Types::kMulticlass ? "Multiclass classification" : "Classification"))
1291 << " performance" << Endl << Endl;
1292 mva->AddOutput(Types::kTesting, analysisType);
1293 }
1294 }
1295}
1296
1297////////////////////////////////////////////////////////////////////////////////
1298
1299void TMVA::Factory::MakeClass(const TString &datasetname, const TString &methodTitle) const
1300{
1301 if (methodTitle != "") {
1302 IMethod *method = GetMethod(datasetname, methodTitle);
1303 if (method)
1304 method->MakeClass();
1305 else {
1306 Log() << kWARNING << "<MakeClass> Could not find classifier \"" << methodTitle << "\" in list" << Endl;
1307 }
1308 } else {
1309
1310 // no classifier specified, print all help messages
1311 MVector *methods = fMethodsMap.find(datasetname)->second;
1312 MVector::const_iterator itrMethod;
1313 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1314 MethodBase *method = dynamic_cast<MethodBase *>(*itrMethod);
1315 if (method == 0)
1316 continue;
1317 Log() << kINFO << "Make response class for classifier: " << method->GetMethodName() << Endl;
1318 method->MakeClass();
1319 }
1320 }
1321}
1322
1323////////////////////////////////////////////////////////////////////////////////
1324/// Print predefined help message of classifier.
1325/// Iterate over methods and test.
1326
1327void TMVA::Factory::PrintHelpMessage(const TString &datasetname, const TString &methodTitle) const
1328{
1329 if (methodTitle != "") {
1330 IMethod *method = GetMethod(datasetname, methodTitle);
1331 if (method)
1332 method->PrintHelpMessage();
1333 else {
1334 Log() << kWARNING << "<PrintHelpMessage> Could not find classifier \"" << methodTitle << "\" in list" << Endl;
1335 }
1336 } else {
1337
1338 // no classifier specified, print all help messages
1339 MVector *methods = fMethodsMap.find(datasetname)->second;
1340 MVector::const_iterator itrMethod;
1341 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1342 MethodBase *method = dynamic_cast<MethodBase *>(*itrMethod);
1343 if (method == 0)
1344 continue;
1345 Log() << kINFO << "Print help message for classifier: " << method->GetMethodName() << Endl;
1346 method->PrintHelpMessage();
1347 }
1348 }
1349}
1350
1351////////////////////////////////////////////////////////////////////////////////
1352/// Iterates over all MVA input variables and evaluates them.
1353
1355{
1356 Log() << kINFO << "Evaluating all variables..." << Endl;
1358
1359 for (UInt_t i = 0; i < loader->GetDataSetInfo().GetNVariables(); i++) {
1360 TString s = loader->GetDataSetInfo().GetVariableInfo(i).GetLabel();
1361 if (options.Contains("V"))
1362 s += ":V";
1363 this->BookMethod(loader, "Variable", s);
1364 }
1365}
1366
1367////////////////////////////////////////////////////////////////////////////////
1368/// Iterates over all MVAs that have been booked, and calls their evaluation methods.
1369
1371{
1372 Log() << kHEADER << gTools().Color("bold") << "Evaluate all methods" << gTools().Color("reset") << Endl;
1373
1374 // don't do anything if no method booked
1375 if (fMethodsMap.empty()) {
1376 Log() << kINFO << "...nothing found to evaluate" << Endl;
1377 return;
1378 }
1379 std::map<TString, MVector *>::iterator itrMap;
1380
1381 for (itrMap = fMethodsMap.begin(); itrMap != fMethodsMap.end(); ++itrMap) {
1382 MVector *methods = itrMap->second;
1383
1384 // -----------------------------------------------------------------------
1385 // First part of evaluation process
1386 // --> compute efficiencies, and other separation estimators
1387 // -----------------------------------------------------------------------
1388
1389 // although equal, we now want to separate the output for the variables
1390 // and the real methods
1391 Int_t isel; // will be 0 for a Method; 1 for a Variable
1392 Int_t nmeth_used[2] = {0, 0}; // 0 Method; 1 Variable
1393
1394 std::vector<std::vector<TString>> mname(2);
1395 std::vector<std::vector<Double_t>> sig(2), sep(2), roc(2);
1396 std::vector<std::vector<Double_t>> eff01(2), eff10(2), eff30(2), effArea(2);
1397 std::vector<std::vector<Double_t>> eff01err(2), eff10err(2), eff30err(2);
1398 std::vector<std::vector<Double_t>> trainEff01(2), trainEff10(2), trainEff30(2);
1399
1400 std::vector<std::vector<Float_t>> multiclass_testEff;
1401 std::vector<std::vector<Float_t>> multiclass_trainEff;
1402 std::vector<std::vector<Float_t>> multiclass_testPur;
1403 std::vector<std::vector<Float_t>> multiclass_trainPur;
1404
1405 std::vector<std::vector<Float_t>> train_history;
1406
1407 // Multiclass confusion matrices.
1408 std::vector<TMatrixD> multiclass_trainConfusionEffB01;
1409 std::vector<TMatrixD> multiclass_trainConfusionEffB10;
1410 std::vector<TMatrixD> multiclass_trainConfusionEffB30;
1411 std::vector<TMatrixD> multiclass_testConfusionEffB01;
1412 std::vector<TMatrixD> multiclass_testConfusionEffB10;
1413 std::vector<TMatrixD> multiclass_testConfusionEffB30;
1414
1415 std::vector<std::vector<Double_t>> biastrain(1); // "bias" of the regression on the training data
1416 std::vector<std::vector<Double_t>> biastest(1); // "bias" of the regression on test data
1417 std::vector<std::vector<Double_t>> devtrain(1); // "dev" of the regression on the training data
1418 std::vector<std::vector<Double_t>> devtest(1); // "dev" of the regression on test data
1419 std::vector<std::vector<Double_t>> rmstrain(1); // "rms" of the regression on the training data
1420 std::vector<std::vector<Double_t>> rmstest(1); // "rms" of the regression on test data
1421 std::vector<std::vector<Double_t>> minftrain(1); // "minf" of the regression on the training data
1422 std::vector<std::vector<Double_t>> minftest(1); // "minf" of the regression on test data
1423 std::vector<std::vector<Double_t>> rhotrain(1); // correlation of the regression on the training data
1424 std::vector<std::vector<Double_t>> rhotest(1); // correlation of the regression on test data
1425
1426 // same as above but for 'truncated' quantities (computed for events within 2sigma of RMS)
1427 std::vector<std::vector<Double_t>> biastrainT(1);
1428 std::vector<std::vector<Double_t>> biastestT(1);
1429 std::vector<std::vector<Double_t>> devtrainT(1);
1430 std::vector<std::vector<Double_t>> devtestT(1);
1431 std::vector<std::vector<Double_t>> rmstrainT(1);
1432 std::vector<std::vector<Double_t>> rmstestT(1);
1433 std::vector<std::vector<Double_t>> minftrainT(1);
1434 std::vector<std::vector<Double_t>> minftestT(1);
1435
1436 // following vector contains all methods - with the exception of Cuts, which are special
1438
1441
1442 // iterate over methods and evaluate
1443 for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1445 MethodBase *theMethod = dynamic_cast<MethodBase *>(*itrMethod);
1446 if (theMethod == 0)
1447 continue;
1448 theMethod->SetFile(fgTargetFile);
1449 theMethod->SetSilentFile(IsSilentFile());
1450 if (theMethod->GetMethodType() != Types::kCuts)
1451 methodsNoCuts.push_back(*itrMethod);
1452
1453 if (theMethod->DoRegression()) {
1455
1456 Log() << kINFO << "Evaluate regression method: " << theMethod->GetMethodName() << Endl;
1459 Double_t rho;
1460
1461 Log() << kINFO << "TestRegression (testing)" << Endl;
1462 theMethod->TestRegression(bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTesting);
1463 biastest[0].push_back(bias);
1464 devtest[0].push_back(dev);
1465 rmstest[0].push_back(rms);
1466 minftest[0].push_back(mInf);
1467 rhotest[0].push_back(rho);
1468 biastestT[0].push_back(biasT);
1469 devtestT[0].push_back(devT);
1470 rmstestT[0].push_back(rmsT);
1471 minftestT[0].push_back(mInfT);
1472
1473 Log() << kINFO << "TestRegression (training)" << Endl;
1474 theMethod->TestRegression(bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTraining);
1475 biastrain[0].push_back(bias);
1476 devtrain[0].push_back(dev);
1477 rmstrain[0].push_back(rms);
1478 minftrain[0].push_back(mInf);
1479 rhotrain[0].push_back(rho);
1480 biastrainT[0].push_back(biasT);
1481 devtrainT[0].push_back(devT);
1482 rmstrainT[0].push_back(rmsT);
1483 minftrainT[0].push_back(mInfT);
1484
1485 mname[0].push_back(theMethod->GetMethodName());
1486 nmeth_used[0]++;
1487 if (!IsSilentFile()) {
1488 Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1489 theMethod->WriteEvaluationHistosToFile(Types::kTesting);
1490 theMethod->WriteEvaluationHistosToFile(Types::kTraining);
1491 }
1492 } else if (theMethod->DoMulticlass()) {
1493 // ====================================================================
1494 // === Multiclass evaluation
1495 // ====================================================================
1497 Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl;
1498
1499 // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1500 // This is why it is disabled for now.
1501 // Find approximate optimal working point w.r.t. signalEfficiency * signalPurity.
1502 // theMethod->TestMulticlass(); // This is where the actual GA calc is done
1503 // multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur));
1504
1505 theMethod->TestMulticlass();
1506
1507 // Confusion matrix at three background efficiency levels
1508 multiclass_trainConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTraining));
1509 multiclass_trainConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTraining));
1510 multiclass_trainConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTraining));
1511
1512 multiclass_testConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTesting));
1513 multiclass_testConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTesting));
1514 multiclass_testConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTesting));
1515
1516 if (!IsSilentFile()) {
1517 Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1518 theMethod->WriteEvaluationHistosToFile(Types::kTesting);
1519 theMethod->WriteEvaluationHistosToFile(Types::kTraining);
1520 }
1521
1522 nmeth_used[0]++;
1523 mname[0].push_back(theMethod->GetMethodName());
1524 } else {
1525
1526 Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl;
1527 isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0;
1528
1529 // perform the evaluation
1530 theMethod->TestClassification();
1531
1532 // evaluate the classifier
1533 mname[isel].push_back(theMethod->GetMethodName());
1534 sig[isel].push_back(theMethod->GetSignificance());
1535 sep[isel].push_back(theMethod->GetSeparation());
1536 roc[isel].push_back(theMethod->GetROCIntegral());
1537
1538 Double_t err;
1539 eff01[isel].push_back(theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err));
1540 eff01err[isel].push_back(err);
1541 eff10[isel].push_back(theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err));
1542 eff10err[isel].push_back(err);
1543 eff30[isel].push_back(theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err));
1544 eff30err[isel].push_back(err);
1545 effArea[isel].push_back(theMethod->GetEfficiency("", Types::kTesting, err)); // computes the area (average)
1546
1547 trainEff01[isel].push_back(
1548 theMethod->GetTrainingEfficiency("Efficiency:0.01")); // the first pass takes longer
1549 trainEff10[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.10"));
1550 trainEff30[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.30"));
1551
1552 nmeth_used[isel]++;
1553
1554 if (!IsSilentFile()) {
1555 Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1556 theMethod->WriteEvaluationHistosToFile(Types::kTesting);
1557 theMethod->WriteEvaluationHistosToFile(Types::kTraining);
1558 }
1559 }
1560 }
1561 if (doRegression) {
1562
1563 std::vector<TString> vtemps = mname[0];
1564 std::vector<std::vector<Double_t>> vtmp;
1565 vtmp.push_back(devtest[0]); // this is the vector that is ranked
1566 vtmp.push_back(devtrain[0]);
1567 vtmp.push_back(biastest[0]);
1568 vtmp.push_back(biastrain[0]);
1569 vtmp.push_back(rmstest[0]);
1570 vtmp.push_back(rmstrain[0]);
1571 vtmp.push_back(minftest[0]);
1572 vtmp.push_back(minftrain[0]);
1573 vtmp.push_back(rhotest[0]);
1574 vtmp.push_back(rhotrain[0]);
1575 vtmp.push_back(devtestT[0]); // this is the vector that is ranked
1576 vtmp.push_back(devtrainT[0]);
1577 vtmp.push_back(biastestT[0]);
1578 vtmp.push_back(biastrainT[0]);
1579 vtmp.push_back(rmstestT[0]);
1580 vtmp.push_back(rmstrainT[0]);
1581 vtmp.push_back(minftestT[0]);
1582 vtmp.push_back(minftrainT[0]);
1584 mname[0] = vtemps;
1585 devtest[0] = vtmp[0];
1586 devtrain[0] = vtmp[1];
1587 biastest[0] = vtmp[2];
1588 biastrain[0] = vtmp[3];
1589 rmstest[0] = vtmp[4];
1590 rmstrain[0] = vtmp[5];
1591 minftest[0] = vtmp[6];
1592 minftrain[0] = vtmp[7];
1593 rhotest[0] = vtmp[8];
1594 rhotrain[0] = vtmp[9];
1595 devtestT[0] = vtmp[10];
1596 devtrainT[0] = vtmp[11];
1597 biastestT[0] = vtmp[12];
1598 biastrainT[0] = vtmp[13];
1599 rmstestT[0] = vtmp[14];
1600 rmstrainT[0] = vtmp[15];
1601 minftestT[0] = vtmp[16];
1602 minftrainT[0] = vtmp[17];
1603 } else if (doMulticlass) {
1604 // TODO: fill in something meaningful
1605 // If there is some ranking of methods to be done it should be done here.
1606 // However, this is not so easy to define for multiclass so it is left out for now.
1607
1608 } else {
1609 // now sort the variables according to the best 'eff at Beff=0.10'
1610 for (Int_t k = 0; k < 2; k++) {
1611 std::vector<std::vector<Double_t>> vtemp;
1612 vtemp.push_back(effArea[k]); // this is the vector that is ranked
1613 vtemp.push_back(eff10[k]);
1614 vtemp.push_back(eff01[k]);
1615 vtemp.push_back(eff30[k]);
1616 vtemp.push_back(eff10err[k]);
1617 vtemp.push_back(eff01err[k]);
1618 vtemp.push_back(eff30err[k]);
1619 vtemp.push_back(trainEff10[k]);
1620 vtemp.push_back(trainEff01[k]);
1621 vtemp.push_back(trainEff30[k]);
1622 vtemp.push_back(sig[k]);
1623 vtemp.push_back(sep[k]);
1624 vtemp.push_back(roc[k]);
1625 std::vector<TString> vtemps = mname[k];
1627 effArea[k] = vtemp[0];
1628 eff10[k] = vtemp[1];
1629 eff01[k] = vtemp[2];
1630 eff30[k] = vtemp[3];
1631 eff10err[k] = vtemp[4];
1632 eff01err[k] = vtemp[5];
1633 eff30err[k] = vtemp[6];
1634 trainEff10[k] = vtemp[7];
1635 trainEff01[k] = vtemp[8];
1636 trainEff30[k] = vtemp[9];
1637 sig[k] = vtemp[10];
1638 sep[k] = vtemp[11];
1639 roc[k] = vtemp[12];
1640 mname[k] = vtemps;
1641 }
1642 }
1643
1644 // -----------------------------------------------------------------------
1645 // Second part of evaluation process
1646 // --> compute correlations among MVAs
1647 // --> compute correlations between input variables and MVA (determines importance)
1648 // --> count overlaps
1649 // -----------------------------------------------------------------------
1650 if (fCorrelations) {
1651 const Int_t nmeth = methodsNoCuts.size();
1652 MethodBase *method = dynamic_cast<MethodBase *>(methods[0][0]);
1653 const Int_t nvar = method->fDataSetInfo.GetNVariables();
1654 if (!doRegression && !doMulticlass) {
1655
1656 if (nmeth > 0) {
1657
1658 // needed for correlations
1659 Double_t *dvec = new Double_t[nmeth + nvar];
1660 std::vector<Double_t> rvec;
1661
1662 // for correlations
1663 TPrincipal *tpSig = new TPrincipal(nmeth + nvar, "");
1664 TPrincipal *tpBkg = new TPrincipal(nmeth + nvar, "");
1665
1666 // set required tree branch references
1667 Int_t ivar = 0;
1668 std::vector<TString> *theVars = new std::vector<TString>;
1669 std::vector<ResultsClassification *> mvaRes;
1670 for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end();
1671 ++itrMethod, ++ivar) {
1672 MethodBase *m = dynamic_cast<MethodBase *>(*itrMethod);
1673 if (m == 0)
1674 continue;
1675 theVars->push_back(m->GetTestvarName());
1676 rvec.push_back(m->GetSignalReferenceCut());
1677 theVars->back().ReplaceAll("MVA_", "");
1678 mvaRes.push_back(dynamic_cast<ResultsClassification *>(
1679 m->Data()->GetResults(m->GetMethodName(), Types::kTesting, Types::kMaxAnalysisType)));
1680 }
1681
1682 // for overlap study
1685 (*overlapS) *= 0; // init...
1686 (*overlapB) *= 0; // init...
1687
1688 // loop over test tree
1689 DataSet *defDs = method->fDataSetInfo.GetDataSet();
1690 defDs->SetCurrentType(Types::kTesting);
1691 for (Int_t ievt = 0; ievt < defDs->GetNEvents(); ievt++) {
1692 const Event *ev = defDs->GetEvent(ievt);
1693
1694 // for correlations
1695 TMatrixD *theMat = 0;
1696 for (Int_t im = 0; im < nmeth; im++) {
1697 // check for NaN value
1698 Double_t retval = (Double_t)(*mvaRes[im])[ievt][0];
1699 if (TMath::IsNaN(retval)) {
1700 Log() << kWARNING << "Found NaN return value in event: " << ievt << " for method \""
1701 << methodsNoCuts[im]->GetName() << "\"" << Endl;
1702 dvec[im] = 0;
1703 } else
1704 dvec[im] = retval;
1705 }
1706 for (Int_t iv = 0; iv < nvar; iv++)
1707 dvec[iv + nmeth] = (Double_t)ev->GetValue(iv);
1708 if (method->fDataSetInfo.IsSignal(ev)) {
1709 tpSig->AddRow(dvec);
1710 theMat = overlapS;
1711 } else {
1712 tpBkg->AddRow(dvec);
1713 theMat = overlapB;
1714 }
1715
1716 // count overlaps
1717 for (Int_t im = 0; im < nmeth; im++) {
1718 for (Int_t jm = im; jm < nmeth; jm++) {
1719 if ((dvec[im] - rvec[im]) * (dvec[jm] - rvec[jm]) > 0) {
1720 (*theMat)(im, jm)++;
1721 if (im != jm)
1722 (*theMat)(jm, im)++;
1723 }
1724 }
1725 }
1726 }
1727
1728 // renormalise overlap matrix
1729 (*overlapS) *= (1.0 / defDs->GetNEvtSigTest()); // init...
1730 (*overlapB) *= (1.0 / defDs->GetNEvtBkgdTest()); // init...
1731
1732 tpSig->MakePrincipals();
1733 tpBkg->MakePrincipals();
1734
1735 const TMatrixD *covMatS = tpSig->GetCovarianceMatrix();
1736 const TMatrixD *covMatB = tpBkg->GetCovarianceMatrix();
1737
1740
1741 // print correlation matrices
1742 if (corrMatS != 0 && corrMatB != 0) {
1743
1744 // extract MVA matrix
1747 for (Int_t im = 0; im < nmeth; im++) {
1748 for (Int_t jm = 0; jm < nmeth; jm++) {
1749 mvaMatS(im, jm) = (*corrMatS)(im, jm);
1750 mvaMatB(im, jm) = (*corrMatB)(im, jm);
1751 }
1752 }
1753
1754 // extract variables - to MVA matrix
1755 std::vector<TString> theInputVars;
1756 TMatrixD varmvaMatS(nvar, nmeth);
1757 TMatrixD varmvaMatB(nvar, nmeth);
1758 for (Int_t iv = 0; iv < nvar; iv++) {
1759 theInputVars.push_back(method->fDataSetInfo.GetVariableInfo(iv).GetLabel());
1760 for (Int_t jm = 0; jm < nmeth; jm++) {
1761 varmvaMatS(iv, jm) = (*corrMatS)(nmeth + iv, jm);
1762 varmvaMatB(iv, jm) = (*corrMatB)(nmeth + iv, jm);
1763 }
1764 }
1765
1766 if (nmeth > 1) {
1767 Log() << kINFO << Endl;
1768 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1769 << "Inter-MVA correlation matrix (signal):" << Endl;
1771 Log() << kINFO << Endl;
1772
1773 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1774 << "Inter-MVA correlation matrix (background):" << Endl;
1776 Log() << kINFO << Endl;
1777 }
1778
1779 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1780 << "Correlations between input variables and MVA response (signal):" << Endl;
1782 Log() << kINFO << Endl;
1783
1784 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1785 << "Correlations between input variables and MVA response (background):" << Endl;
1787 Log() << kINFO << Endl;
1788 } else
1789 Log() << kWARNING << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1790 << "<TestAllMethods> cannot compute correlation matrices" << Endl;
1791
1792 // print overlap matrices
1793 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1794 << "The following \"overlap\" matrices contain the fraction of events for which " << Endl;
1795 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1796 << "the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
1797 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1798 << "An event is signal-like, if its MVA output exceeds the following value:" << Endl;
1799 gTools().FormattedOutput(rvec, *theVars, "Method", "Cut value", Log());
1800 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1801 << "which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
1802
1803 // give notice that cut method has been excluded from this test
1804 if (nmeth != (Int_t)methods->size())
1805 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1806 << "Note: no correlations and overlap with cut method are provided at present" << Endl;
1807
1808 if (nmeth > 1) {
1809 Log() << kINFO << Endl;
1810 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1811 << "Inter-MVA overlap matrix (signal):" << Endl;
1813 Log() << kINFO << Endl;
1814
1815 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
1816 << "Inter-MVA overlap matrix (background):" << Endl;
1818 }
1819
1820 // cleanup
1821 delete tpSig;
1822 delete tpBkg;
1823 delete corrMatS;
1824 delete corrMatB;
1825 delete theVars;
1826 delete overlapS;
1827 delete overlapB;
1828 delete[] dvec;
1829 }
1830 }
1831 }
1832 // -----------------------------------------------------------------------
1833 // Third part of evaluation process
1834 // --> output
1835 // -----------------------------------------------------------------------
1836
1837 if (doRegression) {
1838
1839 Log() << kINFO << Endl;
1840 TString hLine =
1841 "--------------------------------------------------------------------------------------------------";
1842 Log() << kINFO << "Evaluation results ranked by smallest RMS on test sample:" << Endl;
1843 Log() << kINFO << "(\"Bias\" quotes the mean deviation of the regression from true target." << Endl;
1844 Log() << kINFO << " \"MutInf\" is the \"Mutual Information\" between regression and target." << Endl;
1845 Log() << kINFO << " Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" << Endl;
1846 Log() << kINFO << " tained when removing events deviating more than 2sigma from average.)" << Endl;
1847 Log() << kINFO << hLine << Endl;
1848 // Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf
1849 // MutInf_T" << Endl;
1850 Log() << kINFO << hLine << Endl;
1851
1852 for (Int_t i = 0; i < nmeth_used[0]; i++) {
1853 MethodBase *theMethod = dynamic_cast<MethodBase *>((*methods)[i]);
1854 if (theMethod == 0)
1855 continue;
1856
1857 Log() << kINFO
1858 << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f", theMethod->fDataSetInfo.GetName(),
1859 (const char *)mname[0][i], biastest[0][i], biastestT[0][i], rmstest[0][i], rmstestT[0][i],
1860 minftest[0][i], minftestT[0][i])
1861 << Endl;
1862 }
1863 Log() << kINFO << hLine << Endl;
1864 Log() << kINFO << Endl;
1865 Log() << kINFO << "Evaluation results ranked by smallest RMS on training sample:" << Endl;
1866 Log() << kINFO << "(overtraining check)" << Endl;
1867 Log() << kINFO << hLine << Endl;
1868 Log() << kINFO
1869 << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T"
1870 << Endl;
1871 Log() << kINFO << hLine << Endl;
1872
1873 for (Int_t i = 0; i < nmeth_used[0]; i++) {
1874 MethodBase *theMethod = dynamic_cast<MethodBase *>((*methods)[i]);
1875 if (theMethod == 0)
1876 continue;
1877 Log() << kINFO
1878 << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f", theMethod->fDataSetInfo.GetName(),
1879 (const char *)mname[0][i], biastrain[0][i], biastrainT[0][i], rmstrain[0][i], rmstrainT[0][i],
1880 minftrain[0][i], minftrainT[0][i])
1881 << Endl;
1882 }
1883 Log() << kINFO << hLine << Endl;
1884 Log() << kINFO << Endl;
1885 } else if (doMulticlass) {
1886 // ====================================================================
1887 // === Multiclass Output
1888 // ====================================================================
1889
1890 TString hLine =
1891 "-------------------------------------------------------------------------------------------------------";
1892
1893 // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1894 // This is why it is disabled for now.
1895 //
1896 // // --- Acheivable signal efficiency * signal purity
1897 // // --------------------------------------------------------------------
1898 // Log() << kINFO << Endl;
1899 // Log() << kINFO << "Evaluation results ranked by best signal efficiency times signal purity " << Endl;
1900 // Log() << kINFO << hLine << Endl;
1901
1902 // // iterate over methods and evaluate
1903 // for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1904 // MethodBase *theMethod = dynamic_cast<MethodBase *>(*itrMethod);
1905 // if (theMethod == 0) {
1906 // continue;
1907 // }
1908
1909 // TString header = "DataSet Name MVA Method ";
1910 // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1911 // header += TString::Format("%-12s ", theMethod->fDataSetInfo.GetClassInfo(icls)->GetName());
1912 // }
1913
1914 // Log() << kINFO << header << Endl;
1915 // Log() << kINFO << hLine << Endl;
1916 // for (Int_t i = 0; i < nmeth_used[0]; i++) {
1917 // TString res = TString::Format("[%-14s] %-15s", theMethod->fDataSetInfo.GetName(), mname[0][i].Data());
1918 // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1919 // res += TString::Format("%#1.3f ", (multiclass_testEff[i][icls]) * (multiclass_testPur[i][icls]));
1920 // }
1921 // Log() << kINFO << res << Endl;
1922 // }
1923
1924 // Log() << kINFO << hLine << Endl;
1925 // Log() << kINFO << Endl;
1926 // }
1927
1928 // --- 1 vs Rest ROC AUC, signal efficiency @ given background efficiency
1929 // --------------------------------------------------------------------
1930 TString header1 = TString::Format("%-15s%-15s%-15s%-15s%-15s%-15s", "Dataset", "MVA Method", "ROC AUC", "Sig eff@B=0.01",
1931 "Sig eff@B=0.10", "Sig eff@B=0.30");
1932 TString header2 = TString::Format("%-15s%-15s%-15s%-15s%-15s%-15s", "Name:", "/ Class:", "test (train)", "test (train)",
1933 "test (train)", "test (train)");
1934 Log() << kINFO << Endl;
1935 Log() << kINFO << "1-vs-rest performance metrics per class" << Endl;
1936 Log() << kINFO << hLine << Endl;
1937 Log() << kINFO << Endl;
1938 Log() << kINFO << "Considers the listed class as signal and the other classes" << Endl;
1939 Log() << kINFO << "as background, reporting the resulting binary performance." << Endl;
1940 Log() << kINFO << "A score of 0.820 (0.850) means 0.820 was acheived on the" << Endl;
1941 Log() << kINFO << "test set and 0.850 on the training set." << Endl;
1942
1943 Log() << kINFO << Endl;
1944 Log() << kINFO << header1 << Endl;
1945 Log() << kINFO << header2 << Endl;
1946 for (Int_t k = 0; k < 2; k++) {
1947 for (Int_t i = 0; i < nmeth_used[k]; i++) {
1948 if (k == 1) {
1949 mname[k][i].ReplaceAll("Variable_", "");
1950 }
1951
1952 const TString datasetName = itrMap->first;
1953 const TString mvaName = mname[k][i];
1954
1955 MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, mvaName));
1956 if (theMethod == 0) {
1957 continue;
1958 }
1959
1960 Log() << kINFO << Endl;
1961 TString row = TString::Format("%-15s%-15s", datasetName.Data(), mvaName.Data());
1962 Log() << kINFO << row << Endl;
1963 Log() << kINFO << "------------------------------" << Endl;
1964
1965 UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
1966 for (UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1967
1970
1971 const TString className = theMethod->DataInfo().GetClassInfo(iClass)->GetName();
1972 const Double_t rocaucTrain = rocCurveTrain->GetROCIntegral();
1973 const Double_t effB01Train = rocCurveTrain->GetEffSForEffB(0.01);
1974 const Double_t effB10Train = rocCurveTrain->GetEffSForEffB(0.10);
1975 const Double_t effB30Train = rocCurveTrain->GetEffSForEffB(0.30);
1976 const Double_t rocaucTest = rocCurveTest->GetROCIntegral();
1977 const Double_t effB01Test = rocCurveTest->GetEffSForEffB(0.01);
1978 const Double_t effB10Test = rocCurveTest->GetEffSForEffB(0.10);
1979 const Double_t effB30Test = rocCurveTest->GetEffSForEffB(0.30);
1980 const TString rocaucCmp = TString::Format("%5.3f (%5.3f)", rocaucTest, rocaucTrain);
1981 const TString effB01Cmp = TString::Format("%5.3f (%5.3f)", effB01Test, effB01Train);
1982 const TString effB10Cmp = TString::Format("%5.3f (%5.3f)", effB10Test, effB10Train);
1983 const TString effB30Cmp = TString::Format("%5.3f (%5.3f)", effB30Test, effB30Train);
1984 row = TString::Format("%-15s%-15s%-15s%-15s%-15s%-15s", "", className.Data(), rocaucCmp.Data(), effB01Cmp.Data(),
1985 effB10Cmp.Data(), effB30Cmp.Data());
1986 Log() << kINFO << row << Endl;
1987
1988 delete rocCurveTrain;
1989 delete rocCurveTest;
1990 }
1991 }
1992 }
1993 Log() << kINFO << Endl;
1994 Log() << kINFO << hLine << Endl;
1995 Log() << kINFO << Endl;
1996
1997 // --- Confusion matrices
1998 // --------------------------------------------------------------------
1999 auto printMatrix = [](TMatrixD const &matTraining, TMatrixD const &matTesting, std::vector<TString> classnames,
2000 UInt_t numClasses, MsgLogger &stream) {
2001 // assert (classLabledWidth >= valueLabelWidth + 2)
2002 // if (...) {Log() << kWARN << "..." << Endl; }
2003
2004 // TODO: Ensure matrices are same size.
2005
2006 TString header = TString::Format(" %-14s", " ");
2007 TString headerInfo = TString::Format(" %-14s", " ");
2008
2009 for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
2010 header += TString::Format(" %-14s", classnames[iCol].Data());
2011 headerInfo += TString::Format(" %-14s", " test (train)");
2012 }
2013 stream << kINFO << header << Endl;
2014 stream << kINFO << headerInfo << Endl;
2015
2016 for (UInt_t iRow = 0; iRow < numClasses; ++iRow) {
2017 stream << kINFO << TString::Format(" %-14s", classnames[iRow].Data());
2018
2019 for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
2020 if (iCol == iRow) {
2021 stream << kINFO << TString::Format(" %-14s", "-");
2022 } else {
2025 TString entry = TString::Format("%-5.3f (%-5.3f)", testValue, trainValue);
2026 stream << kINFO << TString::Format(" %-14s", entry.Data());
2027 }
2028 }
2029 stream << kINFO << Endl;
2030 }
2031 };
2032
2033 Log() << kINFO << Endl;
2034 Log() << kINFO << "Confusion matrices for all methods" << Endl;
2035 Log() << kINFO << hLine << Endl;
2036 Log() << kINFO << Endl;
2037 Log() << kINFO << "Does a binary comparison between the two classes given by a " << Endl;
2038 Log() << kINFO << "particular row-column combination. In each case, the class " << Endl;
2039 Log() << kINFO << "given by the row is considered signal while the class given " << Endl;
2040 Log() << kINFO << "by the column index is considered background." << Endl;
2041 Log() << kINFO << Endl;
2042 for (UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
2043 MethodBase *theMethod = dynamic_cast<MethodBase *>(methods->at(iMethod));
2044 if (theMethod == nullptr) {
2045 continue;
2046 }
2047 UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
2048
2049 std::vector<TString> classnames;
2050 for (UInt_t iCls = 0; iCls < numClasses; ++iCls) {
2051 classnames.push_back(theMethod->fDataSetInfo.GetClassInfo(iCls)->GetName());
2052 }
2053 Log() << kINFO
2054 << "=== Showing confusion matrix for method : " << Form("%-15s", (const char *)mname[0][iMethod])
2055 << Endl;
2056 Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.01%)" << Endl;
2057 Log() << kINFO << "---------------------------------------------------" << Endl;
2059 numClasses, Log());
2060 Log() << kINFO << Endl;
2061
2062 Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.10%)" << Endl;
2063 Log() << kINFO << "---------------------------------------------------" << Endl;
2065 numClasses, Log());
2066 Log() << kINFO << Endl;
2067
2068 Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.30%)" << Endl;
2069 Log() << kINFO << "---------------------------------------------------" << Endl;
2071 numClasses, Log());
2072 Log() << kINFO << Endl;
2073 }
2074 Log() << kINFO << hLine << Endl;
2075 Log() << kINFO << Endl;
2076
2077 } else {
2078 // Binary classification
2079 if (fROC) {
2080 Log().EnableOutput();
2082 Log() << Endl;
2083 TString hLine = "------------------------------------------------------------------------------------------"
2084 "-------------------------";
2085 Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl;
2086 Log() << kINFO << hLine << Endl;
2087 Log() << kINFO << "DataSet MVA " << Endl;
2088 Log() << kINFO << "Name: Method: ROC-integ" << Endl;
2089
2090 // Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error):
2091 // | Sepa- Signifi- " << Endl; Log() << kDEBUG << "Name: Method: @B=0.01
2092 // @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl;
2093 Log() << kDEBUG << hLine << Endl;
2094 for (Int_t k = 0; k < 2; k++) {
2095 if (k == 1 && nmeth_used[k] > 0) {
2096 Log() << kINFO << hLine << Endl;
2097 Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
2098 }
2099 for (Int_t i = 0; i < nmeth_used[k]; i++) {
2100 TString datasetName = itrMap->first;
2101 TString methodName = mname[k][i];
2102
2103 if (k == 1) {
2104 methodName.ReplaceAll("Variable_", "");
2105 }
2106
2107 MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, methodName));
2108 if (theMethod == 0) {
2109 continue;
2110 }
2111
2112 TMVA::DataSet *dataset = theMethod->Data();
2113 TMVA::Results *results = dataset->GetResults(methodName, Types::kTesting, this->fAnalysisType);
2114 std::vector<Bool_t> *mvaResType =
2115 dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
2116
2117 Double_t rocIntegral = 0.0;
2118 if (mvaResType->size() != 0) {
2119 rocIntegral = GetROCIntegral(datasetName, methodName);
2120 }
2121
2122 if (sep[k][i] < 0 || sig[k][i] < 0) {
2123 // cannot compute separation/significance -> no MVA (usually for Cuts)
2124 Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), effArea[k][i])
2125 << Endl;
2126
2127 // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
2128 // %#1.3f %#1.3f | -- --",
2129 // datasetName.Data(),
2130 // methodName.Data(),
2131 // eff01[k][i], Int_t(1000*eff01err[k][i]),
2132 // eff10[k][i], Int_t(1000*eff10err[k][i]),
2133 // eff30[k][i], Int_t(1000*eff30err[k][i]),
2134 // effArea[k][i],rocIntegral) << Endl;
2135 } else {
2136 Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), rocIntegral)
2137 << Endl;
2138 // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
2139 // %#1.3f %#1.3f | %#1.3f %#1.3f",
2140 // datasetName.Data(),
2141 // methodName.Data(),
2142 // eff01[k][i], Int_t(1000*eff01err[k][i]),
2143 // eff10[k][i], Int_t(1000*eff10err[k][i]),
2144 // eff30[k][i], Int_t(1000*eff30err[k][i]),
2145 // effArea[k][i],rocIntegral,
2146 // sep[k][i], sig[k][i]) << Endl;
2147 }
2148 }
2149 }
2150 Log() << kINFO << hLine << Endl;
2151 Log() << kINFO << Endl;
2152 Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl;
2153 Log() << kINFO << hLine << Endl;
2154 Log() << kINFO
2155 << "DataSet MVA Signal efficiency: from test sample (from training sample) "
2156 << Endl;
2157 Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2158 << Endl;
2159 Log() << kINFO << hLine << Endl;
2160 for (Int_t k = 0; k < 2; k++) {
2161 if (k == 1 && nmeth_used[k] > 0) {
2162 Log() << kINFO << hLine << Endl;
2163 Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
2164 }
2165 for (Int_t i = 0; i < nmeth_used[k]; i++) {
2166 if (k == 1)
2167 mname[k][i].ReplaceAll("Variable_", "");
2168 MethodBase *theMethod = dynamic_cast<MethodBase *>((*methods)[i]);
2169 if (theMethod == 0)
2170 continue;
2171
2172 Log() << kINFO
2173 << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2174 theMethod->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i],
2175 trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2176 << Endl;
2177 }
2178 }
2179 Log() << kINFO << hLine << Endl;
2180 Log() << kINFO << Endl;
2181
2182 if (gTools().CheckForSilentOption(GetOptions()))
2183 Log().InhibitOutput();
2184 } // end fROC
2185 }
2186 if (!IsSilentFile()) {
2187 std::list<TString> datasets;
2188 for (Int_t k = 0; k < 2; k++) {
2189 for (Int_t i = 0; i < nmeth_used[k]; i++) {
2190 MethodBase *theMethod = dynamic_cast<MethodBase *>((*methods)[i]);
2191 if (theMethod == 0)
2192 continue;
2193 // write test/training trees
2194 RootBaseDir()->cd(theMethod->fDataSetInfo.GetName());
2195 if (std::find(datasets.begin(), datasets.end(), theMethod->fDataSetInfo.GetName()) == datasets.end()) {
2196 theMethod->fDataSetInfo.GetDataSet()->GetTree(Types::kTesting)->Write("", TObject::kOverwrite);
2197 theMethod->fDataSetInfo.GetDataSet()->GetTree(Types::kTraining)->Write("", TObject::kOverwrite);
2198 datasets.push_back(theMethod->fDataSetInfo.GetName());
2199 }
2200 }
2201 }
2202 }
2203 } // end for MethodsMap
2204 // references for citation
2206}
2207
2208////////////////////////////////////////////////////////////////////////////////
2209/// Evaluate Variable Importance
2210
2212 const char *theOption)
2213{
2214 fModelPersistence = kFALSE;
2215 fSilentFile = kTRUE; // we need silent file here because we need fast classification results
2216
2217 // getting number of variables and variable names from loader
2218 const int nbits = loader->GetDataSetInfo().GetNVariables();
2219 if (vitype == VIType::kShort)
2220 return EvaluateImportanceShort(loader, theMethod, methodTitle, theOption);
2221 else if (vitype == VIType::kAll)
2222 return EvaluateImportanceAll(loader, theMethod, methodTitle, theOption);
2223 else if (vitype == VIType::kRandom) {
2224 if ( nbits > 10 && nbits < 30) {
2225 // limit nbits to less than 30 to avoid error converting from double to uint and also cannot deal with too many combinations
2226 return EvaluateImportanceRandom(loader, static_cast<UInt_t>( pow(2, nbits) ), theMethod, methodTitle, theOption);
2227 } else if (nbits < 10) {
2228 Log() << kERROR << "Error in Variable Importance: Random mode require more that 10 variables in the dataset."
2229 << Endl;
2230 } else if (nbits > 30) {
2231 Log() << kERROR << "Error in Variable Importance: Number of variables is too large for Random mode"
2232 << Endl;
2233 }
2234 }
2235 return nullptr;
2236}
2237
2238////////////////////////////////////////////////////////////////////////////////
2239
2241 const char *theOption)
2242{
2243
2244 uint64_t x = 0;
2245 uint64_t y = 0;
2246
2247 // getting number of variables and variable names from loader
2248 const int nbits = loader->GetDataSetInfo().GetNVariables();
2249 std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2250
2251 if (nbits > 60) {
2252 Log() << kERROR << "Number of combinations is too large , is 2^" << nbits << Endl;
2253 return nullptr;
2254 }
2255 if (nbits > 20) {
2256 Log() << kWARNING << "Number of combinations is very large , is 2^" << nbits << Endl;
2257 }
2258 uint64_t range = static_cast<uint64_t>(pow(2, nbits));
2259
2260
2261 // vector to save importances
2262 std::vector<Double_t> importances(nbits);
2263 // vector to save ROC
2264 std::vector<Double_t> ROC(range);
2265 ROC[0] = 0.5;
2266 for (int i = 0; i < nbits; i++)
2267 importances[i] = 0;
2268
2269 Double_t SROC, SSROC; // computed ROC value
2270 for (x = 1; x < range; x++) {
2271
2272 std::bitset<VIBITS> xbitset(x);
2273 if (x == 0)
2274 continue; // data loader need at least one variable
2275
2276 // creating loader for seed
2278
2279 // adding variables from seed
2280 for (int index = 0; index < nbits; index++) {
2281 if (xbitset[index])
2282 seedloader->AddVariable(varNames[index], 'F');
2283 }
2284
2286 seedloader->PrepareTrainingAndTestTree(loader->GetDataSetInfo().GetCut("Signal"),
2287 loader->GetDataSetInfo().GetCut("Background"),
2288 loader->GetDataSetInfo().GetSplitOptions());
2289
2290 // Booking Seed
2291 BookMethod(seedloader, theMethod, methodTitle, theOption);
2292
2293 // Train/Test/Evaluation
2294 TrainAllMethods();
2295 TestAllMethods();
2296 EvaluateAllMethods();
2297
2298 // getting ROC
2299 ROC[x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2300
2301 // cleaning information to process sub-seeds
2302 TMVA::MethodBase *smethod = dynamic_cast<TMVA::MethodBase *>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2305 delete sresults;
2306 delete seedloader;
2307 this->DeleteAllMethods();
2308
2309 fMethodsMap.clear();
2310 // removing global result because it is requiring a lot of RAM for all seeds
2311 }
2312
2313 for (x = 0; x < range; x++) {
2314 SROC = ROC[x];
2315 for (uint32_t i = 0; i < VIBITS; ++i) {
2316 if (x & (uint64_t(1) << i)) {
2317 y = x & ~(1 << i);
2318 std::bitset<VIBITS> ybitset(y);
2319 // need at least one variable
2320 // NOTE: if sub-seed is zero then is the special case
2321 // that count in xbitset is 1
2322 uint32_t ny = static_cast<uint32_t>( log(x - y) / 0.693147 ) ;
2323 if (y == 0) {
2324 importances[ny] = SROC - 0.5;
2325 continue;
2326 }
2327
2328 // getting ROC
2329 SSROC = ROC[y];
2330 importances[ny] += SROC - SSROC;
2331 // cleaning information
2332 }
2333 }
2334 }
2335 std::cout << "--- Variable Importance Results (All)" << std::endl;
2336 return GetImportance(nbits, importances, varNames);
2337}
2338
2339static uint64_t sum(uint64_t i)
2340{
2341 // add a limit for overflows
2342 if (i > 62) return 0;
2343 return static_cast<uint64_t>( std::pow(2, i + 1)) - 1;
2344 // uint64_t _sum = 0;
2345 // for (uint64_t n = 0; n < i; n++)
2346 // _sum += pow(2, n);
2347 // return _sum;
2348}
2349
2350////////////////////////////////////////////////////////////////////////////////
2351
2353 const char *theOption)
2354{
2355 uint64_t x = 0;
2356 uint64_t y = 0;
2357
2358 // getting number of variables and variable names from loader
2359 const int nbits = loader->GetDataSetInfo().GetNVariables();
2360 std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2361
2362 if (nbits > 60) {
2363 Log() << kERROR << "Number of combinations is too large , is 2^" << nbits << Endl;
2364 return nullptr;
2365 }
2366 long int range = sum(nbits);
2367 // std::cout<<range<<std::endl;
2368 // vector to save importances
2369 std::vector<Double_t> importances(nbits);
2370 for (int i = 0; i < nbits; i++)
2371 importances[i] = 0;
2372
2373 Double_t SROC, SSROC; // computed ROC value
2374
2375 x = range;
2376
2377 std::bitset<VIBITS> xbitset(x);
2378 if (x == 0)
2379 Log() << kFATAL << "Error: need at least one variable."; // data loader need at least one variable
2380
2381 // creating loader for seed
2383
2384 // adding variables from seed
2385 for (int index = 0; index < nbits; index++) {
2386 if (xbitset[index])
2387 seedloader->AddVariable(varNames[index], 'F');
2388 }
2389
2390 // Loading Dataset
2392
2393 // Booking Seed
2394 BookMethod(seedloader, theMethod, methodTitle, theOption);
2395
2396 // Train/Test/Evaluation
2397 TrainAllMethods();
2398 TestAllMethods();
2399 EvaluateAllMethods();
2400
2401 // getting ROC
2402 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2403
2404 // cleaning information to process sub-seeds
2405 TMVA::MethodBase *smethod = dynamic_cast<TMVA::MethodBase *>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2408 delete sresults;
2409 delete seedloader;
2410 this->DeleteAllMethods();
2411 fMethodsMap.clear();
2412
2413 // removing global result because it is requiring a lot of RAM for all seeds
2414
2415 for (uint32_t i = 0; i < VIBITS; ++i) {
2416 if (x & (1 << i)) {
2417 y = x & ~(uint64_t(1) << i);
2418 std::bitset<VIBITS> ybitset(y);
2419 // need at least one variable
2420 // NOTE: if sub-seed is zero then is the special case
2421 // that count in xbitset is 1
2422 uint32_t ny = static_cast<uint32_t>(log(x - y) / 0.693147);
2423 if (y == 0) {
2424 importances[ny] = SROC - 0.5;
2425 continue;
2426 }
2427
2428 // creating loader for sub-seed
2430 // adding variables from sub-seed
2431 for (int index = 0; index < nbits; index++) {
2432 if (ybitset[index])
2433 subseedloader->AddVariable(varNames[index], 'F');
2434 }
2435
2436 // Loading Dataset
2438
2439 // Booking SubSeed
2440 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2441
2442 // Train/Test/Evaluation
2443 TrainAllMethods();
2444 TestAllMethods();
2445 EvaluateAllMethods();
2446
2447 // getting ROC
2448 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2449 importances[ny] += SROC - SSROC;
2450
2451 // cleaning information
2452 TMVA::MethodBase *ssmethod = dynamic_cast<TMVA::MethodBase *>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2455 delete ssresults;
2456 delete subseedloader;
2457 this->DeleteAllMethods();
2458 fMethodsMap.clear();
2459 }
2460 }
2461 std::cout << "--- Variable Importance Results (Short)" << std::endl;
2462 return GetImportance(nbits, importances, varNames);
2463}
2464
2465////////////////////////////////////////////////////////////////////////////////
2466
2468 TString methodTitle, const char *theOption)
2469{
2470 TRandom3 *rangen = new TRandom3(0); // Random Gen.
2471
2472 uint64_t x = 0;
2473 uint64_t y = 0;
2474
2475 // getting number of variables and variable names from loader
2476 const int nbits = loader->GetDataSetInfo().GetNVariables();
2477 std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2478
2479 long int range = pow(2, nbits);
2480
2481 // vector to save importances
2482 std::vector<Double_t> importances(nbits);
2483 for (int i = 0; i < nbits; i++)
2484 importances[i] = 0;
2485
2486 Double_t SROC, SSROC; // computed ROC value
2487 for (UInt_t n = 0; n < nseeds; n++) {
2488 x = rangen->Integer(range);
2489
2490 std::bitset<32> xbitset(x);
2491 if (x == 0)
2492 continue; // data loader need at least one variable
2493
2494 // creating loader for seed
2496
2497 // adding variables from seed
2498 for (int index = 0; index < nbits; index++) {
2499 if (xbitset[index])
2500 seedloader->AddVariable(varNames[index], 'F');
2501 }
2502
2503 // Loading Dataset
2505
2506 // Booking Seed
2507 BookMethod(seedloader, theMethod, methodTitle, theOption);
2508
2509 // Train/Test/Evaluation
2510 TrainAllMethods();
2511 TestAllMethods();
2512 EvaluateAllMethods();
2513
2514 // getting ROC
2515 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2516 // std::cout << "Seed: n " << n << " x " << x << " xbitset:" << xbitset << " ROC " << SROC << std::endl;
2517
2518 // cleaning information to process sub-seeds
2519 TMVA::MethodBase *smethod = dynamic_cast<TMVA::MethodBase *>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2522 delete sresults;
2523 delete seedloader;
2524 this->DeleteAllMethods();
2525 fMethodsMap.clear();
2526
2527 // removing global result because it is requiring a lot of RAM for all seeds
2528
2529 for (uint32_t i = 0; i < 32; ++i) {
2530 if (x & (uint64_t(1) << i)) {
2531 y = x & ~(1 << i);
2532 std::bitset<32> ybitset(y);
2533 // need at least one variable
2534 // NOTE: if sub-seed is zero then is the special case
2535 // that count in xbitset is 1
2536 Double_t ny = log(x - y) / 0.693147;
2537 if (y == 0) {
2538 importances[ny] = SROC - 0.5;
2539 // std::cout << "SubSeed: " << y << " y:" << ybitset << "ROC " << 0.5 << std::endl;
2540 continue;
2541 }
2542
2543 // creating loader for sub-seed
2545 // adding variables from sub-seed
2546 for (int index = 0; index < nbits; index++) {
2547 if (ybitset[index])
2548 subseedloader->AddVariable(varNames[index], 'F');
2549 }
2550
2551 // Loading Dataset
2553
2554 // Booking SubSeed
2555 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2556
2557 // Train/Test/Evaluation
2558 TrainAllMethods();
2559 TestAllMethods();
2560 EvaluateAllMethods();
2561
2562 // getting ROC
2563 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2564 importances[ny] += SROC - SSROC;
2565 // std::cout << "SubSeed: " << y << " y:" << ybitset << " x-y " << x - y << " " << std::bitset<32>(x - y) <<
2566 // " ny " << ny << " SROC " << SROC << " SSROC " << SSROC << " Importance = " << importances[ny] <<
2567 // std::endl; cleaning information
2569 dynamic_cast<TMVA::MethodBase *>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2572 delete ssresults;
2573 delete subseedloader;
2574 this->DeleteAllMethods();
2575 fMethodsMap.clear();
2576 }
2577 }
2578 }
2579 std::cout << "--- Variable Importance Results (Random)" << std::endl;
2580 return GetImportance(nbits, importances, varNames);
2581}
2582
2583////////////////////////////////////////////////////////////////////////////////
2584
2585TH1F *TMVA::Factory::GetImportance(const int nbits, std::vector<Double_t> importances, std::vector<TString> varNames)
2586{
2587 TH1F *vih1 = new TH1F("vih1", "", nbits, 0, nbits);
2588
2589 gStyle->SetOptStat(000000);
2590
2591 Float_t normalization = 0.0;
2592 for (int i = 0; i < nbits; i++) {
2594 }
2595
2596 Float_t roc = 0.0;
2597
2598 gStyle->SetTitleXOffset(0.4);
2599 gStyle->SetTitleXOffset(1.2);
2600
2601 std::vector<Double_t> x_ie(nbits), y_ie(nbits);
2602 for (Int_t i = 1; i < nbits + 1; i++) {
2603 x_ie[i - 1] = (i - 1) * 1.;
2604 roc = 100.0 * importances[i - 1] / normalization;
2605 y_ie[i - 1] = roc;
2606 std::cout << "--- " << varNames[i - 1] << " = " << roc << " %" << std::endl;
2607 vih1->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
2608 vih1->SetBinContent(i, roc);
2609 }
2610 TGraph *g_ie = new TGraph(nbits + 2, &x_ie[0], &y_ie[0]);
2611 g_ie->SetTitle("");
2612
2613 vih1->LabelsOption("v >", "X");
2614 vih1->SetBarWidth(0.97);
2615 Int_t ca = TColor::GetColor("#006600");
2616 vih1->SetFillColor(ca);
2617 // Int_t ci = TColor::GetColor("#990000");
2618
2619 vih1->GetYaxis()->SetTitle("Importance (%)");
2620 vih1->GetYaxis()->SetTitleSize(0.045);
2621 vih1->GetYaxis()->CenterTitle();
2622 vih1->GetYaxis()->SetTitleOffset(1.24);
2623
2624 vih1->GetYaxis()->SetRangeUser(-7, 50);
2625 vih1->SetDirectory(nullptr);
2626
2627 // vih1->Draw("B");
2628 return vih1;
2629}
#define MinNoTrainingEvents
#define h(i)
Definition RSha256.hxx:106
void printMatrix(const TMatrixD &mat)
write a matrix
int Int_t
Signed integer 4 bytes (int)
Definition RtypesCore.h:59
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
TMatrixT< Double_t > TMatrixD
Definition TMatrixDfwd.h:23
#define gROOT
Definition TROOT.h:411
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2495
R__EXTERN TStyle * gStyle
Definition TStyle.h:442
R__EXTERN TSystem * gSystem
Definition TSystem.h:572
const_iterator begin() const
const_iterator end() const
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition TAttFill.h:38
virtual void SetLineWidth(Width_t lwidth)
Set the line width.
Definition TAttLine.h:45
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition TAttLine.h:42
The Canvas class.
Definition TCanvas.h:23
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
Definition TColor.cxx:1926
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:131
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
TAxis * GetXaxis() const
Get x axis of the graph.
Definition TGraph.cxx:1566
TAxis * GetYaxis() const
Get y axis of the graph.
Definition TGraph.cxx:1575
void SetTitle(const char *title="") override
Change (i.e.
Definition TGraph.cxx:2345
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:879
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition TH1.cxx:1263
Service class for 2-D histogram classes.
Definition TH2.h:30
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
TString fWeightFileDir
Definition Config.h:124
TString fWeightFileDirPrefix
Definition Config.h:123
void SetDrawProgressBar(Bool_t d)
Definition Config.h:69
void SetUseColor(Bool_t uc)
Definition Config.h:60
class TMVA::Config::VariablePlotting fVariablePlotting
void SetSilent(Bool_t s)
Definition Config.h:63
IONames & GetIONames()
Definition Config.h:98
void SetConfigDescription(const char *d)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
void SetConfigName(const char *n)
virtual void ParseOptions()
options parser
const TString & GetOptions() const
MsgLogger & Log() const
MsgLogger * fLogger
! message logger
void CheckForUnusedOptions() const
checks for unused options in option string
Class that contains all the data information.
Definition DataSetInfo.h:62
const TMatrixD * CorrelationMatrix(const TString &className) const
UInt_t GetNClasses() const
DataSet * GetDataSet() const
returns data set
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
const char * GetName() const override
Returns name of object.
Definition DataSetInfo.h:71
ClassInfo * GetClassInfo(Int_t clNum) const
Class that contains all the data information.
Definition DataSet.h:58
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:216
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:399
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Definition Factory.cxx:1327
Bool_t fCorrelations
! enable to calculate correlations
Definition Factory.h:224
std::vector< IMethod * > MVector
Definition Factory.h:84
void TrainAllMethods()
Iterates through all booked methods and calls training.
Definition Factory.cxx:1108
Bool_t Verbose(void) const
Definition Factory.h:143
void WriteDataInformation(DataSetInfo &fDataSetInfo)
Definition Factory.cxx:596
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
Definition Factory.cxx:112
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Definition Factory.cxx:1265
Bool_t fVerbose
! verbose mode
Definition Factory.h:222
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
Definition Factory.cxx:1370
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition Factory.cxx:2467
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Definition Factory.cxx:2585
Bool_t fROC
! enable to calculate ROC values
Definition Factory.h:225
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
Definition Factory.cxx:1354
TString fVerboseLevel
! verbosity level, controls granularity of logging
Definition Factory.h:223
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass, Types::ETreeType type=Types::kTesting)
Generate a collection of graphs, for all methods for a given class.
Definition Factory.cxx:982
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Definition Factory.cxx:2211
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Calculate the integral of the ROC curve, also known as the area under curve (AUC),...
Definition Factory.cxx:843
virtual ~Factory()
Destructor.
Definition Factory.cxx:305
MethodBase * BookMethod(DataLoader *loader, MethodName theMethodName, TString methodTitle, TString theOption="")
Books an MVA classifier or regression method.
Definition Factory.cxx:357
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Definition Factory.cxx:1299
MethodBase * BookMethodWeightfile(DataLoader *dataloader, TMVA::Types::EMVA methodType, const TString &weightfile)
Adds an already constructed method to be managed by this factory.
Definition Factory.cxx:495
Bool_t fModelPersistence
! option to save the trained model in xml file or using serialization
Definition Factory.h:231
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so does just that,...
Definition Factory.cxx:695
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Private method to generate a ROCCurve instance for a given method.
Definition Factory.cxx:743
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition Factory.cxx:2352
Types::EAnalysisType fAnalysisType
! the training type
Definition Factory.h:230
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
Definition Factory.cxx:580
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
Definition Factory.cxx:906
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition Factory.cxx:2240
void SetVerbose(Bool_t v=kTRUE)
Definition Factory.cxx:342
TFile * fgTargetFile
! ROOT output file
Definition Factory.h:214
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
Definition Factory.cxx:560
void DeleteAllMethods(void)
Delete methods.
Definition Factory.cxx:323
TString fTransformations
! list of transformations to test
Definition Factory.h:221
void Greetings()
Print welcome message.
Definition Factory.cxx:294
Interface for all concrete MVA method implementations.
Definition IMethod.h:53
Virtual base Class for all MVA method.
Definition MethodBase.h:111
const TString & GetMethodName() const
Definition MethodBase.h:331
Class for boosting a TMVA method.
Definition MethodBoost.h:58
Class for categorizing the phase space.
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
void SetMinType(EMsgType minType)
Definition MsgLogger.h:70
void SetSource(const std::string &source)
Definition MsgLogger.h:68
static void InhibitOutput()
Definition MsgLogger.cxx:66
Ranking for variables in method (implementation)
Definition Ranking.h:48
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Definition Results.h:57
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition Tools.cxx:887
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition Tools.cxx:1325
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=nullptr)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition Tools.cxx:564
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at 'separator' and fills the list 'splitV' with the primitive strings
Definition Tools.cxx:1199
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:828
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition Tools.cxx:324
@ kHtmlLink
Definition Tools.h:212
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=nullptr)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition Tools.cxx:538
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition Tools.cxx:1440
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition Tools.cxx:1316
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition Tools.cxx:1302
Class that contains all the data information.
Singleton class for Global types used by TMVA.
Definition Types.h:71
static Types & Instance()
The single instance of "Types" if existing already, or create it (Singleton)
Definition Types.cxx:70
@ kCategory
Definition Types.h:97
@ kMulticlass
Definition Types.h:129
@ kNoAnalysisType
Definition Types.h:130
@ kClassification
Definition Types.h:127
@ kMaxAnalysisType
Definition Types.h:131
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition TMultiGraph.h:34
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition TNamed.cxx:173
const char * GetName() const override
Returns name of object.
Definition TNamed.h:49
TString fName
Definition TNamed.h:32
@ kOverwrite
overwrite existing object with same name
Definition TObject.h:98
virtual const char * GetName() const
Returns name of object.
Definition TObject.cxx:457
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition TObject.cxx:964
void SetGrid(Int_t valuex=1, Int_t valuey=1) override
Definition TPad.h:336
TLegend * BuildLegend(Double_t x1=0.3, Double_t y1=0.21, Double_t x2=0.3, Double_t y2=0.21, const char *title="", Option_t *option="") override
Build a legend from the graphical objects in the pad.
Definition TPad.cxx:553
Principal Components Analysis (PCA)
Definition TPrincipal.h:21
Random number generator class based on M.
Definition TRandom3.h:27
Basic string class.
Definition TString.h:138
void ToLower()
Change string to lower-case.
Definition TString.cxx:1189
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition TString.cxx:464
const char * Data() const
Definition TString.h:384
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition TString.h:712
Bool_t IsNull() const
Definition TString.h:422
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:640
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition TStyle.cxx:1641
void SetTitleXOffset(Float_t offset=1)
Definition TStyle.h:413
virtual int MakeDirectory(const char *name)
Make a directory.
Definition TSystem.cxx:836
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Config & gConfig()
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Bool_t IsNaN(Double_t x)
Definition TMath.h:903
TMarker m
Definition textangle.C:8
#define VIBITS
Definition Factory.cxx:102
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2339
const Int_t MinNoTrainingEvents
Definition Factory.cxx:95
#define READXML
Definition Factory.cxx:99