Logo ROOT   6.18/05
Reference Guide
Classification.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata, Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan
3// Therhaag
4
6
8#include <TMVA/Config.h>
9#include <TMVA/Configurable.h>
10#include <TMVA/Tools.h>
11#include <TMVA/Ranking.h>
12#include <TMVA/DataSet.h>
13#include <TMVA/IMethod.h>
14#include <TMVA/MethodBase.h>
16#include <TMVA/DataSetManager.h>
17#include <TMVA/DataSetInfo.h>
18#include <TMVA/DataLoader.h>
19#include <TMVA/MethodBoost.h>
20#include <TMVA/MethodCategory.h>
21#include <TMVA/ROCCalc.h>
22#include <TMVA/ROCCurve.h>
23#include <TMVA/MsgLogger.h>
24
25#include <TMVA/VariableInfo.h>
27
28#include <TMVA/Types.h>
29
30#include <TROOT.h>
31#include <TFile.h>
32#include <TTree.h>
33#include <TLeaf.h>
34#include <TEventList.h>
35#include <TH2.h>
36#include <TText.h>
37#include <TLegend.h>
38#include <TGraph.h>
39#include <TStyle.h>
40#include <TMatrixF.h>
41#include <TMatrixDSym.h>
42#include <TMultiGraph.h>
43#include <TPaletteAxis.h>
44#include <TPrincipal.h>
45#include <TMath.h>
46#include <TObjString.h>
47#include <TSystem.h>
48#include <TCanvas.h>
49#include <iostream>
50#include <memory>
51#define MinNoTrainingEvents 10
52
53//_______________________________________________________________________
55{
56}
57
58//_______________________________________________________________________
60{
61 fMethod = cr.fMethod;
64 fMvaTest = cr.fMvaTest;
65 fIsCuts = cr.fIsCuts;
67}
68
69//_______________________________________________________________________
70/**
71 * Method to get ROC-Integral value from mvas.
72 * \param iClass category, default 0 then signal
73 * \param type train/test tree, default test.
74 * \return Double_t with the ROC-Integral value.
75 */
77{
78 if (fIsCuts) {
79 return fROCIntegral;
80 } else {
81 auto roc = GetROC(iClass, type);
82 auto inte = roc->GetROCIntegral();
83 delete roc;
84 return inte;
85 }
86}
87
88//_______________________________________________________________________
89/**
90 * Method to get TMVA::ROCCurve Object.
91 * \param iClass category, default 0 then signal
92 * \param type train/test tree, default test.
93 * \return TMVA::ROCCurve object.
94 */
96{
97 ROCCurve *fROCCurve = nullptr;
99 fROCCurve = new ROCCurve(fMvaTest[iClass]);
100 else
101 fROCCurve = new ROCCurve(fMvaTrain[iClass]);
102 return fROCCurve;
103}
104
105//_______________________________________________________________________
108{
109 fMethod = cr.fMethod;
110 fDataLoaderName = cr.fDataLoaderName;
111 fMvaTrain = cr.fMvaTrain;
112 fMvaTest = cr.fMvaTest;
113 fIsCuts = cr.fIsCuts;
114 fROCIntegral = cr.fROCIntegral;
115 return *this;
116}
117
118//_______________________________________________________________________
119/**
120 * Method to print the results in stdout.
121 * data loader name, method name/tittle and ROC-integ.
122 */
124{
125 MsgLogger fLogger("Classification");
128 TString hLine = "--------------------------------------------------- :";
129
130 fLogger << kINFO << hLine << Endl;
131 fLogger << kINFO << "DataSet MVA :" << Endl;
132 fLogger << kINFO << "Name: Method/Title: ROC-integ :" << Endl;
133 fLogger << kINFO << hLine << Endl;
134 fLogger << kINFO << Form("%-20s %-15s %#1.3f :", fDataLoaderName.Data(),
135 Form("%s/%s", fMethod.GetValue<TString>("MethodName").Data(),
136 fMethod.GetValue<TString>("MethodTitle").Data()),
137 GetROCIntegral())
138 << Endl;
139 fLogger << kINFO << hLine << Endl;
140
142}
143
144//_______________________________________________________________________
145/**
146 * Method to get TGraph object with the ROC curve.
147 * \param iClass category, default 0 then signal
148 * \param type train/test tree, default test.
149 * \return TGraph object.
150 */
152{
153 TGraph *roc = GetROC(iClass, type)->GetROCCurve();
154 roc->SetName(Form("%s/%s", GetMethodName().Data(), GetMethodTitle().Data()));
155 roc->SetTitle(Form("%s/%s", GetMethodName().Data(), GetMethodTitle().Data()));
156 roc->GetXaxis()->SetTitle(" Signal Efficiency ");
157 roc->GetYaxis()->SetTitle(" Background Rejection ");
158 return roc;
159}
160
161//_______________________________________________________________________
162/**
163 * Method to check if method was booked.
164 * \param methodname name of the method.
165 * \param methodtitle method title.
166 * \return boolean true if the method was booked, false in other case.
167 */
169{
170 return fMethod.GetValue<TString>("MethodName") == methodname &&
171 fMethod.GetValue<TString>("MethodTitle") == methodtitle
172 ? kTRUE
173 : kFALSE;
174}
175
176//_______________________________________________________________________
177/**
178 * Contructor to create a two class classifier.
179 * \param dataloader TMVA::DataLoader object with the data to train/test.
180 * \param file TFile object to save the results
181 * \param options string extra options.
182 */
184 : TMVA::Envelope("Classification", dataloader, file, options), fAnalysisType(Types::kClassification),
185 fCorrelations(kFALSE), fROC(kTRUE)
186{
187 DeclareOptionRef(fCorrelations, "Correlations", "boolean to show correlation in output");
188 DeclareOptionRef(fROC, "ROC", "boolean to show ROC in output");
189 ParseOptions();
191
193 gSystem->MakeDirectory(fDataLoader->GetName()); // creating directory for DataLoader output
194}
195
196//_______________________________________________________________________
197/**
198 * Contructor to create a two class classifier without output file.
199 * \param dataloader TMVA::DataLoader object with the data to train/test.
200 * \param options string extra options.
201 */
203 : TMVA::Envelope("Classification", dataloader, NULL, options), fAnalysisType(Types::kClassification),
204 fCorrelations(kFALSE), fROC(kTRUE)
205{
206
207 // init configurable
208 SetConfigDescription("Configuration options for Classification running");
210
211 DeclareOptionRef(fCorrelations, "Correlations", "boolean to show correlation in output");
212 DeclareOptionRef(fROC, "ROC", "boolean to show ROC in output");
213 ParseOptions();
216 gSystem->MakeDirectory(fDataLoader->GetName()); // creating directory for DataLoader output
218}
219
220//_______________________________________________________________________
222{
223 for (auto m : fIMethods) {
224 if (m != NULL)
225 delete m;
226 }
227}
228
229//_______________________________________________________________________
230/**
231 * return the options for the booked method.
232 * \param methodname name of the method.
233 * \param methodtitle method title.
234 * \return string the with options for the ml method.
235 */
237{
238 for (auto &meth : fMethods) {
239 if (meth.GetValue<TString>("MethodName") == methodname && meth.GetValue<TString>("MethodTitle") == methodtitle)
240 return meth.GetValue<TString>("MethodOptions");
241 }
242 return "";
243}
244
245//_______________________________________________________________________
246/**
247 * Method to perform Train/Test over all ml method booked.
248 * If the option Jobs > 1 can do it in parallel with MultiProc.
249 */
251{
252 fTimer.Reset();
253 fTimer.Start();
254
255 Bool_t roc = fROC;
256 fROC = kFALSE;
257 if (fJobs <= 1) {
258 Train();
259 Test();
260 } else {
261 for (auto &meth : fMethods) {
262 GetMethod(meth.GetValue<TString>("MethodName"), meth.GetValue<TString>("MethodTitle"));
263 }
264 fWorkers.SetNWorkers(fJobs);
265 auto executor = [=](UInt_t workerID) -> ClassificationResult {
270 auto methodname = fMethods[workerID].GetValue<TString>("MethodName");
271 auto methodtitle = fMethods[workerID].GetValue<TString>("MethodTitle");
272 auto meth = GetMethod(methodname, methodtitle);
273 if (!IsSilentFile()) {
274 auto fname = Form(".%s%s%s.root", fDataLoader->GetName(), methodname.Data(), methodtitle.Data());
275 auto f = new TFile(fname, "RECREATE");
276 f->mkdir(fDataLoader->GetName());
277 SetFile(f);
278 meth->SetFile(f);
279 }
280 TrainMethod(methodname, methodtitle);
281 TestMethod(methodname, methodtitle);
282 if (!IsSilentFile()) {
283 GetFile()->Close();
284 }
285 return GetResults(methodname, methodtitle);
286 };
287
288 fResults = fWorkers.Map(executor, ROOT::TSeqI(fMethods.size()));
289 if (!IsSilentFile())
290 MergeFiles();
291 }
292
293 fROC = roc;
295
296 TString hLine = "--------------------------------------------------- :";
297 Log() << kINFO << hLine << Endl;
298 Log() << kINFO << "DataSet MVA :" << Endl;
299 Log() << kINFO << "Name: Method/Title: ROC-integ :" << Endl;
300 Log() << kINFO << hLine << Endl;
301 for (auto &r : fResults) {
302
303 Log() << kINFO << Form("%-20s %-15s %#1.3f :", r.GetDataLoaderName().Data(),
304 Form("%s/%s", r.GetMethodName().Data(), r.GetMethodTitle().Data()), r.GetROCIntegral())
305 << Endl;
306 }
307 Log() << kINFO << hLine << Endl;
308
309 Log() << kINFO << "-----------------------------------------------------" << Endl;
310 Log() << kHEADER << "Evaluation done." << Endl << Endl;
311 Log() << kINFO << Form("Jobs = %d Real Time = %lf ", fJobs, fTimer.RealTime()) << Endl;
312 Log() << kINFO << "-----------------------------------------------------" << Endl;
313 Log() << kINFO << "Evaluation done." << Endl;
315}
316
317//_______________________________________________________________________
318/**
319 * Method to train all booked ml methods.
320 */
322{
323 for (auto &meth : fMethods) {
324 TrainMethod(meth.GetValue<TString>("MethodName"), meth.GetValue<TString>("MethodTitle"));
325 }
326}
327
328//_______________________________________________________________________
329/**
330 * Lets train an specific ml method.
331 * \param methodname name of the method.
332 * \param methodtitle method title.
333 */
335{
336 auto method = GetMethod(methodname, methodtitle);
337 if (!method) {
338 Log() << kFATAL
339 << Form("Trying to train method %s %s that maybe is not booked.", methodname.Data(), methodtitle.Data())
340 << Endl;
341 }
342 Log() << kHEADER << gTools().Color("bold") << Form("Training method %s %s", methodname.Data(), methodtitle.Data())
343 << gTools().Color("reset") << Endl;
344
346 if ((fAnalysisType == Types::kMulticlass || fAnalysisType == Types::kClassification) &&
347 method->DataInfo().GetNClasses() < 2)
348 Log() << kFATAL << "You want to do classification training, but specified less than two classes." << Endl;
349
350 // first print some information about the default dataset
351 // if(!IsSilentFile()) WriteDataInformation(method->fDataSetInfo);
352
353 if (method->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
354 Log() << kWARNING << "Method " << method->GetMethodName() << " not trained (training tree has less entries ["
355 << method->Data()->GetNTrainingEvents() << "] than required [" << MinNoTrainingEvents << "]" << Endl;
356 return;
357 }
358
359 Log() << kHEADER << "Train method: " << method->GetMethodName() << " for Classification" << Endl << Endl;
360 method->TrainMethod();
361 Log() << kHEADER << "Training finished" << Endl << Endl;
362}
363
364//_______________________________________________________________________
365/**
366 * Lets train an specific ml method given the method type in enum TMVA::Types::EMVA
367 * \param method TMVA::Types::EMVA type.
368 * \param methodtitle method title.
369 */
371{
372 TrainMethod(Types::Instance().GetMethodName(method), methodtitle);
373}
374
375//_______________________________________________________________________
376/**
377 * Return a TMVA::MethodBase object. if method is not booked then return a null
378 * pointer.
379 * \param methodname name of the method.
380 * \param methodtitle method title.
381 * \return TMVA::MethodBase object
382 */
384{
385
386 if (!HasMethod(methodname, methodtitle)) {
387 std::cout << methodname << " " << methodtitle << std::endl;
388 Log() << kERROR << "Trying to get method not booked." << Endl;
389 return 0;
390 }
391 Int_t index = -1;
392 if (HasMethodObject(methodname, methodtitle, index)) {
393 return dynamic_cast<MethodBase *>(fIMethods[index]);
394 }
395 // if is not created then lets to create it.
396 if (GetDataLoaderDataInput().GetEntries() <=
397 1) { // 0 entries --> 0 events, 1 entry --> dynamical dataset (or one entry)
398 Log() << kFATAL << "No input data for the training provided!" << Endl;
399 }
400 Log() << kHEADER << "Loading booked method: " << gTools().Color("bold") << methodname << " " << methodtitle
401 << gTools().Color("reset") << Endl << Endl;
402
403 TString moptions = GetMethodOptions(methodname, methodtitle);
404
405 // interpret option string with respect to a request for boosting (i.e., BostNum > 0)
406 Int_t boostNum = 0;
407 auto conf = new TMVA::Configurable(moptions);
408 conf->DeclareOptionRef(boostNum = 0, "Boost_num", "Number of times the classifier will be boosted");
409 conf->ParseOptions();
410 delete conf;
411
412 TString fFileDir;
413 if (fModelPersistence) {
414 fFileDir = fDataLoader->GetName();
415 fFileDir += "/" + gConfig().GetIONames().fWeightFileDir;
416 }
417
418 // initialize methods
419 IMethod *im;
420 TString fJobName = GetName();
421 if (!boostNum) {
422 im = ClassifierFactory::Instance().Create(std::string(methodname.Data()), fJobName, methodtitle,
423 GetDataLoaderDataSetInfo(), moptions);
424 } else {
425 // boosted classifier, requires a specific definition, making it transparent for the user
426 Log() << kDEBUG << "Boost Number is " << boostNum << " > 0: train boosted classifier" << Endl;
427 im = ClassifierFactory::Instance().Create(std::string("Boost"), fJobName, methodtitle, GetDataLoaderDataSetInfo(),
428 moptions);
429 MethodBoost *methBoost = dynamic_cast<MethodBoost *>(im);
430 if (!methBoost)
431 Log() << kFATAL << "Method with type kBoost cannot be casted to MethodCategory. /Classification" << Endl;
432
433 if (fModelPersistence)
434 methBoost->SetWeightFileDir(fFileDir);
435 methBoost->SetModelPersistence(fModelPersistence);
436 methBoost->SetBoostedMethodName(methodname);
437 methBoost->fDataSetManager = GetDataLoaderDataSetManager();
438 methBoost->SetFile(fFile.get());
439 methBoost->SetSilentFile(IsSilentFile());
440 }
441
442 MethodBase *method = dynamic_cast<MethodBase *>(im);
443 if (method == 0)
444 return 0; // could not create method
445
446 // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects)
447 if (method->GetMethodType() == Types::kCategory) {
448 MethodCategory *methCat = (dynamic_cast<MethodCategory *>(im));
449 if (!methCat)
450 Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Classification" << Endl;
451
452 if (fModelPersistence)
453 methCat->SetWeightFileDir(fFileDir);
454 methCat->SetModelPersistence(fModelPersistence);
455 methCat->fDataSetManager = GetDataLoaderDataSetManager();
456 methCat->SetFile(fFile.get());
457 methCat->SetSilentFile(IsSilentFile());
458 }
459
460 if (!method->HasAnalysisType(fAnalysisType, GetDataLoaderDataSetInfo().GetNClasses(),
461 GetDataLoaderDataSetInfo().GetNTargets())) {
462 Log() << kWARNING << "Method " << method->GetMethodTypeName() << " is not capable of handling ";
463 Log() << "classification with " << GetDataLoaderDataSetInfo().GetNClasses() << " classes." << Endl;
464 return 0;
465 }
466
467 if (fModelPersistence)
468 method->SetWeightFileDir(fFileDir);
469 method->SetModelPersistence(fModelPersistence);
470 method->SetAnalysisType(fAnalysisType);
471 method->SetupMethod();
472 method->ParseOptions();
473 method->ProcessSetup();
474 method->SetFile(fFile.get());
475 method->SetSilentFile(IsSilentFile());
476
477 // check-for-unused-options is performed; may be overridden by derived classes
478 method->CheckSetup();
479 fIMethods.push_back(method);
480 return method;
481}
482
483//_______________________________________________________________________
484/**
485 * Allows to check if the TMVA::MethodBase was created and return the index in the vector.
486 * \param methodname name of the method.
487 * \param methodtitle method title.
488 * \param index refrence to Int_t with the position of the method into the vector fIMethods
489 * \return boolean true if the method was found.
490 */
492{
493 if (fIMethods.empty())
494 return kFALSE;
495 for (UInt_t i = 0; i < fIMethods.size(); i++) {
496 // they put method title like method name in MethodBase and type is type name
497 auto methbase = dynamic_cast<MethodBase *>(fIMethods[i]);
498 if (methbase->GetMethodTypeName() == methodname && methbase->GetMethodName() == methodtitle) {
499 index = i;
500 return kTRUE;
501 }
502 }
503 return kFALSE;
504}
505
506//_______________________________________________________________________
507/**
508 * Perform test evaluation in all booked methods.
509 */
511{
512 for (auto &meth : fMethods) {
513 TestMethod(meth.GetValue<TString>("MethodName"), meth.GetValue<TString>("MethodTitle"));
514 }
515}
516
517//_______________________________________________________________________
518/**
519 * Lets perform test an specific ml method.
520 * \param methodname name of the method.
521 * \param methodtitle method title.
522 */
524{
525 auto method = GetMethod(methodname, methodtitle);
526 if (!method) {
527 Log() << kFATAL
528 << Form("Trying to train method %s %s that maybe is not booked.", methodname.Data(), methodtitle.Data())
529 << Endl;
530 }
531
532 Log() << kHEADER << gTools().Color("bold") << "Test all methods" << gTools().Color("reset") << Endl;
534
535 Types::EAnalysisType analysisType = method->GetAnalysisType();
536 Log() << kHEADER << "Test method: " << method->GetMethodName() << " for Classification"
537 << " performance" << Endl << Endl;
538 method->AddOutput(Types::kTesting, analysisType);
539
540 // -----------------------------------------------------------------------
541 // First part of evaluation process
542 // --> compute efficiencies, and other separation estimators
543 // -----------------------------------------------------------------------
544
545 // although equal, we now want to separate the output for the variables
546 // and the real methods
547 Int_t isel; // will be 0 for a Method; 1 for a Variable
548 Int_t nmeth_used[2] = {0, 0}; // 0 Method; 1 Variable
549
550 std::vector<std::vector<TString>> mname(2);
551 std::vector<std::vector<Double_t>> sig(2), sep(2), roc(2);
552 std::vector<std::vector<Double_t>> eff01(2), eff10(2), eff30(2), effArea(2);
553 std::vector<std::vector<Double_t>> eff01err(2), eff10err(2), eff30err(2);
554 std::vector<std::vector<Double_t>> trainEff01(2), trainEff10(2), trainEff30(2);
555
556 method->SetFile(fFile.get());
557 method->SetSilentFile(IsSilentFile());
558
559 MethodBase *methodNoCuts = NULL;
560 if (!IsCutsMethod(method))
561 methodNoCuts = method;
562
563 Log() << kHEADER << "Evaluate classifier: " << method->GetMethodName() << Endl << Endl;
564 isel = (method->GetMethodTypeName().Contains("Variable")) ? 1 : 0;
565
566 // perform the evaluation
567 method->TestClassification();
568
569 // evaluate the classifier
570 mname[isel].push_back(method->GetMethodName());
571 sig[isel].push_back(method->GetSignificance());
572 sep[isel].push_back(method->GetSeparation());
573 roc[isel].push_back(method->GetROCIntegral());
574
575 Double_t err;
576 eff01[isel].push_back(method->GetEfficiency("Efficiency:0.01", Types::kTesting, err));
577 eff01err[isel].push_back(err);
578 eff10[isel].push_back(method->GetEfficiency("Efficiency:0.10", Types::kTesting, err));
579 eff10err[isel].push_back(err);
580 eff30[isel].push_back(method->GetEfficiency("Efficiency:0.30", Types::kTesting, err));
581 eff30err[isel].push_back(err);
582 effArea[isel].push_back(method->GetEfficiency("", Types::kTesting, err)); // computes the area (average)
583
584 trainEff01[isel].push_back(method->GetTrainingEfficiency("Efficiency:0.01")); // the first pass takes longer
585 trainEff10[isel].push_back(method->GetTrainingEfficiency("Efficiency:0.10"));
586 trainEff30[isel].push_back(method->GetTrainingEfficiency("Efficiency:0.30"));
587
588 nmeth_used[isel]++;
589
590 if (!IsSilentFile()) {
591 Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
592 method->WriteEvaluationHistosToFile(Types::kTesting);
593 method->WriteEvaluationHistosToFile(Types::kTraining);
594 }
595
596 // now sort the variables according to the best 'eff at Beff=0.10'
597 for (Int_t k = 0; k < 2; k++) {
598 std::vector<std::vector<Double_t>> vtemp;
599 vtemp.push_back(effArea[k]); // this is the vector that is ranked
600 vtemp.push_back(eff10[k]);
601 vtemp.push_back(eff01[k]);
602 vtemp.push_back(eff30[k]);
603 vtemp.push_back(eff10err[k]);
604 vtemp.push_back(eff01err[k]);
605 vtemp.push_back(eff30err[k]);
606 vtemp.push_back(trainEff10[k]);
607 vtemp.push_back(trainEff01[k]);
608 vtemp.push_back(trainEff30[k]);
609 vtemp.push_back(sig[k]);
610 vtemp.push_back(sep[k]);
611 vtemp.push_back(roc[k]);
612 std::vector<TString> vtemps = mname[k];
613 gTools().UsefulSortDescending(vtemp, &vtemps);
614 effArea[k] = vtemp[0];
615 eff10[k] = vtemp[1];
616 eff01[k] = vtemp[2];
617 eff30[k] = vtemp[3];
618 eff10err[k] = vtemp[4];
619 eff01err[k] = vtemp[5];
620 eff30err[k] = vtemp[6];
621 trainEff10[k] = vtemp[7];
622 trainEff01[k] = vtemp[8];
623 trainEff30[k] = vtemp[9];
624 sig[k] = vtemp[10];
625 sep[k] = vtemp[11];
626 roc[k] = vtemp[12];
627 mname[k] = vtemps;
628 }
629
630 // -----------------------------------------------------------------------
631 // Second part of evaluation process
632 // --> compute correlations among MVAs
633 // --> compute correlations between input variables and MVA (determines importance)
634 // --> count overlaps
635 // -----------------------------------------------------------------------
636 if (fCorrelations) {
637 const Int_t nmeth = methodNoCuts == NULL ? 0 : 1;
638 const Int_t nvar = method->fDataSetInfo.GetNVariables();
639 if (nmeth > 0) {
640
641 // needed for correlations
642 Double_t *dvec = new Double_t[nmeth + nvar];
643 std::vector<Double_t> rvec;
644
645 // for correlations
646 TPrincipal *tpSig = new TPrincipal(nmeth + nvar, "");
647 TPrincipal *tpBkg = new TPrincipal(nmeth + nvar, "");
648
649 // set required tree branch references
650 std::vector<TString> *theVars = new std::vector<TString>;
651 std::vector<ResultsClassification *> mvaRes;
652 theVars->push_back(methodNoCuts->GetTestvarName());
653 rvec.push_back(methodNoCuts->GetSignalReferenceCut());
654 theVars->back().ReplaceAll("MVA_", "");
655 mvaRes.push_back(dynamic_cast<ResultsClassification *>(
656 methodNoCuts->Data()->GetResults(methodNoCuts->GetMethodName(), Types::kTesting, Types::kMaxAnalysisType)));
657
658 // for overlap study
659 TMatrixD *overlapS = new TMatrixD(nmeth, nmeth);
660 TMatrixD *overlapB = new TMatrixD(nmeth, nmeth);
661 (*overlapS) *= 0; // init...
662 (*overlapB) *= 0; // init...
663
664 // loop over test tree
665 DataSet *defDs = method->fDataSetInfo.GetDataSet();
667 for (Int_t ievt = 0; ievt < defDs->GetNEvents(); ievt++) {
668 const Event *ev = defDs->GetEvent(ievt);
669
670 // for correlations
671 TMatrixD *theMat = 0;
672 for (Int_t im = 0; im < nmeth; im++) {
673 // check for NaN value
674 Double_t retval = (Double_t)(*mvaRes[im])[ievt][0];
675 if (TMath::IsNaN(retval)) {
676 Log() << kWARNING << "Found NaN return value in event: " << ievt << " for method \""
677 << methodNoCuts->GetName() << "\"" << Endl;
678 dvec[im] = 0;
679 } else
680 dvec[im] = retval;
681 }
682 for (Int_t iv = 0; iv < nvar; iv++)
683 dvec[iv + nmeth] = (Double_t)ev->GetValue(iv);
684 if (method->fDataSetInfo.IsSignal(ev)) {
685 tpSig->AddRow(dvec);
686 theMat = overlapS;
687 } else {
688 tpBkg->AddRow(dvec);
689 theMat = overlapB;
690 }
691
692 // count overlaps
693 for (Int_t im = 0; im < nmeth; im++) {
694 for (Int_t jm = im; jm < nmeth; jm++) {
695 if ((dvec[im] - rvec[im]) * (dvec[jm] - rvec[jm]) > 0) {
696 (*theMat)(im, jm)++;
697 if (im != jm)
698 (*theMat)(jm, im)++;
699 }
700 }
701 }
702 }
703
704 // renormalise overlap matrix
705 (*overlapS) *= (1.0 / defDs->GetNEvtSigTest()); // init...
706 (*overlapB) *= (1.0 / defDs->GetNEvtBkgdTest()); // init...
707
708 tpSig->MakePrincipals();
709 tpBkg->MakePrincipals();
710
711 const TMatrixD *covMatS = tpSig->GetCovarianceMatrix();
712 const TMatrixD *covMatB = tpBkg->GetCovarianceMatrix();
713
714 const TMatrixD *corrMatS = gTools().GetCorrelationMatrix(covMatS);
715 const TMatrixD *corrMatB = gTools().GetCorrelationMatrix(covMatB);
716
717 // print correlation matrices
718 if (corrMatS != 0 && corrMatB != 0) {
719
720 // extract MVA matrix
721 TMatrixD mvaMatS(nmeth, nmeth);
722 TMatrixD mvaMatB(nmeth, nmeth);
723 for (Int_t im = 0; im < nmeth; im++) {
724 for (Int_t jm = 0; jm < nmeth; jm++) {
725 mvaMatS(im, jm) = (*corrMatS)(im, jm);
726 mvaMatB(im, jm) = (*corrMatB)(im, jm);
727 }
728 }
729
730 // extract variables - to MVA matrix
731 std::vector<TString> theInputVars;
732 TMatrixD varmvaMatS(nvar, nmeth);
733 TMatrixD varmvaMatB(nvar, nmeth);
734 for (Int_t iv = 0; iv < nvar; iv++) {
735 theInputVars.push_back(method->fDataSetInfo.GetVariableInfo(iv).GetLabel());
736 for (Int_t jm = 0; jm < nmeth; jm++) {
737 varmvaMatS(iv, jm) = (*corrMatS)(nmeth + iv, jm);
738 varmvaMatB(iv, jm) = (*corrMatB)(nmeth + iv, jm);
739 }
740 }
741
742 if (nmeth > 1) {
743 Log() << kINFO << Endl;
744 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
745 << "Inter-MVA correlation matrix (signal):" << Endl;
746 gTools().FormattedOutput(mvaMatS, *theVars, Log());
747 Log() << kINFO << Endl;
748
749 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
750 << "Inter-MVA correlation matrix (background):" << Endl;
751 gTools().FormattedOutput(mvaMatB, *theVars, Log());
752 Log() << kINFO << Endl;
753 }
754
755 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
756 << "Correlations between input variables and MVA response (signal):" << Endl;
757 gTools().FormattedOutput(varmvaMatS, theInputVars, *theVars, Log());
758 Log() << kINFO << Endl;
759
760 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
761 << "Correlations between input variables and MVA response (background):" << Endl;
762 gTools().FormattedOutput(varmvaMatB, theInputVars, *theVars, Log());
763 Log() << kINFO << Endl;
764 } else
765 Log() << kWARNING << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
766 << "<TestAllMethods> cannot compute correlation matrices" << Endl;
767
768 // print overlap matrices
769 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
770 << "The following \"overlap\" matrices contain the fraction of events for which " << Endl;
771 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
772 << "the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
773 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
774 << "An event is signal-like, if its MVA output exceeds the following value:" << Endl;
775 gTools().FormattedOutput(rvec, *theVars, "Method", "Cut value", Log());
776 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
777 << "which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
778
779 // give notice that cut method has been excluded from this test
780 if (nmeth != 1)
781 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
782 << "Note: no correlations and overlap with cut method are provided at present" << Endl;
783
784 if (nmeth > 1) {
785 Log() << kINFO << Endl;
786 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
787 << "Inter-MVA overlap matrix (signal):" << Endl;
788 gTools().FormattedOutput(*overlapS, *theVars, Log());
789 Log() << kINFO << Endl;
790
791 Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
792 << "Inter-MVA overlap matrix (background):" << Endl;
793 gTools().FormattedOutput(*overlapB, *theVars, Log());
794 }
795
796 // cleanup
797 delete tpSig;
798 delete tpBkg;
799 delete corrMatS;
800 delete corrMatB;
801 delete theVars;
802 delete overlapS;
803 delete overlapB;
804 delete[] dvec;
805 }
806 }
807
808 // -----------------------------------------------------------------------
809 // Third part of evaluation process
810 // --> output
811 // -----------------------------------------------------------------------
812 // putting results in the classification result object
813 auto &fResult = GetResults(methodname, methodtitle);
814
815 // Binary classification
816 if (fROC) {
817 Log().EnableOutput();
819 Log() << Endl;
820 TString hLine = "------------------------------------------------------------------------------------------"
821 "-------------------------";
822 Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl;
823 Log() << kINFO << hLine << Endl;
824 Log() << kINFO << "DataSet MVA " << Endl;
825 Log() << kINFO << "Name: Method: ROC-integ" << Endl;
826
827 Log() << kDEBUG << hLine << Endl;
828 for (Int_t k = 0; k < 2; k++) {
829 if (k == 1 && nmeth_used[k] > 0) {
830 Log() << kINFO << hLine << Endl;
831 Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
832 }
833 for (Int_t i = 0; i < nmeth_used[k]; i++) {
834 TString datasetName = fDataLoader->GetName();
835 TString methodName = mname[k][i];
836
837 if (k == 1) {
838 methodName.ReplaceAll("Variable_", "");
839 }
840
841 TMVA::DataSet *dataset = method->Data();
842 TMVA::Results *results = dataset->GetResults(methodName, Types::kTesting, this->fAnalysisType);
843 std::vector<Bool_t> *mvaResType = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
844
845 Double_t rocIntegral = 0.0;
846 if (mvaResType->size() != 0) {
847 rocIntegral = GetROCIntegral(methodname, methodtitle);
848 }
849
850 if (sep[k][i] < 0 || sig[k][i] < 0) {
851 // cannot compute separation/significance -> no MVA (usually for Cuts)
852 fResult.fROCIntegral = effArea[k][i];
853 Log() << kINFO
854 << Form("%-13s %-15s: %#1.3f", fDataLoader->GetName(), methodName.Data(), fResult.fROCIntegral)
855 << Endl;
856 } else {
857 fResult.fROCIntegral = rocIntegral;
858 Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), rocIntegral)
859 << Endl;
860 }
861 }
862 }
863 Log() << kINFO << hLine << Endl;
864 Log() << kINFO << Endl;
865 Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl;
866 Log() << kINFO << hLine << Endl;
867 Log() << kINFO
868 << "DataSet MVA Signal efficiency: from test sample (from training sample) "
869 << Endl;
870 Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 "
871 << Endl;
872 Log() << kINFO << hLine << Endl;
873 for (Int_t k = 0; k < 2; k++) {
874 if (k == 1 && nmeth_used[k] > 0) {
875 Log() << kINFO << hLine << Endl;
876 Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
877 }
878 for (Int_t i = 0; i < nmeth_used[k]; i++) {
879 if (k == 1)
880 mname[k][i].ReplaceAll("Variable_", "");
881
882 Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
883 method->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i],
884 trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
885 << Endl;
886 }
887 }
888 Log() << kINFO << hLine << Endl;
889 Log() << kINFO << Endl;
890
891 if (gTools().CheckForSilentOption(GetOptions()))
892 Log().InhibitOutput();
893 } else if (IsCutsMethod(method)) { // end fROC
894 for (Int_t k = 0; k < 2; k++) {
895 for (Int_t i = 0; i < nmeth_used[k]; i++) {
896
897 if (sep[k][i] < 0 || sig[k][i] < 0) {
898 // cannot compute separation/significance -> no MVA (usually for Cuts)
899 fResult.fROCIntegral = effArea[k][i];
900 }
901 }
902 }
903 }
904
905 TMVA::DataSet *dataset = method->Data();
907
908 if (IsCutsMethod(method)) {
909 fResult.fIsCuts = kTRUE;
910 } else {
911 auto rocCurveTest = GetROC(methodname, methodtitle, 0, Types::kTesting);
912 fResult.fMvaTest[0] = rocCurveTest->GetMvas();
913 fResult.fROCIntegral = GetROCIntegral(methodname, methodtitle);
914 }
915 TString className = method->DataInfo().GetClassInfo(0)->GetName();
916 fResult.fClassNames.push_back(className);
917
918 if (!IsSilentFile()) {
919 // write test/training trees
920 RootBaseDir()->cd(method->fDataSetInfo.GetName());
921 method->fDataSetInfo.GetDataSet()->GetTree(Types::kTesting)->Write("", TObject::kOverwrite);
922 method->fDataSetInfo.GetDataSet()->GetTree(Types::kTraining)->Write("", TObject::kOverwrite);
923 }
924}
925
926//_______________________________________________________________________
927/**
928 * Lets perform test an specific ml method given the method type in enum TMVA::Types::EMVA.
929 * \param method TMVA::Types::EMVA type.
930 * \param methodtitle method title.
931 */
933{
934 TestMethod(Types::Instance().GetMethodName(method), methodtitle);
935}
936
937//_______________________________________________________________________
938/**
939 * return the the vector of TMVA::Experimental::ClassificationResult objects.
940 * \return vector of results.
941 */
942std::vector<TMVA::Experimental::ClassificationResult> &TMVA::Experimental::Classification::GetResults()
943{
944 if (fResults.size() == 0)
945 Log() << kFATAL << "No Classification results available" << Endl;
946 return fResults;
947}
948
949//_______________________________________________________________________
950/**
951 * Allows to check if the ml method is a Cuts method.
952 * \return boolen true if the method is a Cuts method.
953 */
955{
956 return method->GetMethodType() == Types::kCuts ? kTRUE : kFALSE;
957}
958
959//_______________________________________________________________________
960/**
961 * Allow to get result for an specific ml method.
962 * \param methodname name of the method.
963 * \param methodtitle method title.
964 * \return TMVA::Experimental::ClassificationResult object for the method.
965 */
968{
969 for (auto &result : fResults) {
970 if (result.IsMethod(methodname, methodtitle))
971 return result;
972 }
974 result.fMethod["MethodName"] = methodname;
975 result.fMethod["MethodTitle"] = methodtitle;
976 result.fDataLoaderName = fDataLoader->GetName();
977 fResults.push_back(result);
978 return fResults.back();
979}
980
981//_______________________________________________________________________
982/**
983 * Method to get TMVA::ROCCurve Object.
984 * \param method TMVA::MethodBase object
985 * \param iClass category, default 0 then signal
986 * \param type train/test tree, default test.
987 * \return TMVA::ROCCurve object.
988 */
991{
992 TMVA::DataSet *dataset = method->Data();
993 dataset->SetCurrentType(type);
994 TMVA::Results *results = dataset->GetResults(method->GetName(), type, this->fAnalysisType);
995
996 UInt_t nClasses = method->DataInfo().GetNClasses();
997 if (this->fAnalysisType == Types::kMulticlass && iClass >= nClasses) {
998 Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
999 iClass, nClasses)
1000 << Endl;
1001 return nullptr;
1002 }
1003
1004 TMVA::ROCCurve *rocCurve = nullptr;
1005 if (this->fAnalysisType == Types::kClassification) {
1006
1007 std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
1008 std::vector<Bool_t> *mvaResTypes = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
1009 std::vector<Float_t> mvaResWeights;
1010
1011 auto eventCollection = dataset->GetEventCollection(type);
1012 mvaResWeights.reserve(eventCollection.size());
1013 for (auto ev : eventCollection) {
1014 mvaResWeights.push_back(ev->GetWeight());
1015 }
1016
1017 rocCurve = new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
1018
1019 } else if (this->fAnalysisType == Types::kMulticlass) {
1020 std::vector<Float_t> mvaRes;
1021 std::vector<Bool_t> mvaResTypes;
1022 std::vector<Float_t> mvaResWeights;
1023
1024 std::vector<std::vector<Float_t>> *rawMvaRes = dynamic_cast<ResultsMulticlass *>(results)->GetValueVector();
1025
1026 // Vector transpose due to values being stored as
1027 // [ [0, 1, 2], [0, 1, 2], ... ]
1028 // in ResultsMulticlass::GetValueVector.
1029 mvaRes.reserve(rawMvaRes->size());
1030 for (auto item : *rawMvaRes) {
1031 mvaRes.push_back(item[iClass]);
1032 }
1033
1034 auto eventCollection = dataset->GetEventCollection(type);
1035 mvaResTypes.reserve(eventCollection.size());
1036 mvaResWeights.reserve(eventCollection.size());
1037 for (auto ev : eventCollection) {
1038 mvaResTypes.push_back(ev->GetClass() == iClass);
1039 mvaResWeights.push_back(ev->GetWeight());
1040 }
1041
1042 rocCurve = new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
1043 }
1044
1045 return rocCurve;
1046}
1047
1048//_______________________________________________________________________
1049/**
1050 * Method to get TMVA::ROCCurve Object.
1051 * \param methodname ml method name.
1052 * \param methodtitle ml method title.
1053 * \param iClass category, default 0 then signal
1054 * \param type train/test tree, default test.
1055 * \return TMVA::ROCCurve object.
1056 */
1059{
1060 return GetROC(GetMethod(methodname, methodtitle), iClass, type);
1061}
1062
1063//_______________________________________________________________________
1064/**
1065 * Method to get ROC-Integral value from mvas.
1066 * \param methodname ml method name.
1067 * \param methodtitle ml method title.
1068 * \param iClass category, default 0 then signal
1069 * \return Double_t with the ROC-Integral value.
1070 */
1072{
1073 TMVA::ROCCurve *rocCurve = GetROC(methodname, methodtitle, iClass);
1074 if (!rocCurve) {
1075 Log() << kFATAL
1076 << Form("ROCCurve object was not created in MethodName = %s MethodTitle = %s not found with Dataset = %s ",
1077 methodname.Data(), methodtitle.Data(), fDataLoader->GetName())
1078 << Endl;
1079 return 0;
1080 }
1081
1083 Double_t rocIntegral = rocCurve->GetROCIntegral(npoints);
1084 delete rocCurve;
1085
1086 return rocIntegral;
1087}
1088
1089//_______________________________________________________________________
1091{
1092 TFile *savdir = file;
1093 TDirectory *adir = savdir;
1094 adir->cd();
1095 // loop on all entries of this directory
1096 TKey *key;
1097 TIter nextkey(src->GetListOfKeys());
1098 while ((key = (TKey *)nextkey())) {
1099 const Char_t *classname = key->GetClassName();
1100 TClass *cl = gROOT->GetClass(classname);
1101 if (!cl)
1102 continue;
1103 if (cl->InheritsFrom(TDirectory::Class())) {
1104 src->cd(key->GetName());
1105 TDirectory *subdir = file;
1106 adir->cd();
1107 CopyFrom(subdir, file);
1108 adir->cd();
1109 } else if (cl->InheritsFrom(TTree::Class())) {
1110 TTree *T = (TTree *)src->Get(key->GetName());
1111 adir->cd();
1112 TTree *newT = T->CloneTree(-1, "fast");
1113 newT->Write();
1114 } else {
1115 src->cd();
1116 TObject *obj = key->ReadObj();
1117 adir->cd();
1118 obj->Write();
1119 delete obj;
1120 }
1121 }
1122 adir->SaveSelf(kTRUE);
1123 savdir->cd();
1124}
1125
1126//_______________________________________________________________________
1128{
1129
1130 auto dsdir = fFile->mkdir(fDataLoader->GetName()); // dataset dir
1131 TTree *TrainTree = 0;
1132 TTree *TestTree = 0;
1133 TFile *ifile = 0;
1134 TFile *ofile = 0;
1135 for (UInt_t i = 0; i < fMethods.size(); i++) {
1136 auto methodname = fMethods[i].GetValue<TString>("MethodName");
1137 auto methodtitle = fMethods[i].GetValue<TString>("MethodTitle");
1138 auto fname = Form(".%s%s%s.root", fDataLoader->GetName(), methodname.Data(), methodtitle.Data());
1139 TDirectoryFile *ds = 0;
1140 if (i == 0) {
1141 ifile = new TFile(fname);
1142 ds = (TDirectoryFile *)ifile->Get(fDataLoader->GetName());
1143 } else {
1144 ofile = new TFile(fname);
1145 ds = (TDirectoryFile *)ofile->Get(fDataLoader->GetName());
1146 }
1147 auto tmptrain = (TTree *)ds->Get("TrainTree");
1148 auto tmptest = (TTree *)ds->Get("TestTree");
1149 fFile->cd();
1150 fFile->cd(fDataLoader->GetName());
1151
1152 auto methdirname = Form("Method_%s", methodtitle.Data());
1153 auto methdir = dsdir->mkdir(methdirname, methdirname);
1154 auto methdirbase = methdir->mkdir(methodtitle.Data(), methodtitle.Data());
1155 auto mfdir = (TDirectoryFile *)ds->Get(methdirname);
1156 auto mfdirbase = (TDirectoryFile *)mfdir->Get(methodtitle.Data());
1157
1158 CopyFrom(mfdirbase, (TFile *)methdirbase);
1159 dsdir->cd();
1160 if (i == 0) {
1161 TrainTree = tmptrain->CopyTree("");
1162 TestTree = tmptest->CopyTree("");
1163 } else {
1164 Float_t mva = 0;
1165 auto trainbranch = TrainTree->Branch(methodtitle.Data(), &mva);
1166 tmptrain->SetBranchAddress(methodtitle.Data(), &mva);
1167 auto entries = tmptrain->GetEntries();
1168 for (UInt_t ev = 0; ev < entries; ev++) {
1169 tmptrain->GetEntry(ev);
1170 trainbranch->Fill();
1171 }
1172 auto testbranch = TestTree->Branch(methodtitle.Data(), &mva);
1173 tmptest->SetBranchAddress(methodtitle.Data(), &mva);
1174 entries = tmptest->GetEntries();
1175 for (UInt_t ev = 0; ev < entries; ev++) {
1176 tmptest->GetEntry(ev);
1177 testbranch->Fill();
1178 }
1179 ofile->Close();
1180 }
1181 }
1182 TrainTree->Write();
1183 TestTree->Write();
1184 ifile->Close();
1185 // cleaning
1186 for (UInt_t i = 0; i < fMethods.size(); i++) {
1187 auto methodname = fMethods[i].GetValue<TString>("MethodName");
1188 auto methodtitle = fMethods[i].GetValue<TString>("MethodTitle");
1189 auto fname = Form(".%s%s%s.root", fDataLoader->GetName(), methodname.Data(), methodtitle.Data());
1190 gSystem->Unlink(fname);
1191 }
1192}
void Class()
Definition: Class.C:29
#define MinNoTrainingEvents
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
char Char_t
Definition: RtypesCore.h:29
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
float Float_t
Definition: RtypesCore.h:53
const Bool_t kTRUE
Definition: RtypesCore.h:87
int type
Definition: TGX11.cxx:120
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
#define gROOT
Definition: TROOT.h:414
char * Form(const char *fmt,...)
R__EXTERN TSystem * gSystem
Definition: TSystem.h:560
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition: TClass.h:75
Bool_t InheritsFrom(const char *cl) const
Return kTRUE if this class inherits from a class with name "classname".
Definition: TClass.cxx:4737
A ROOT file is structured in Directories (like a file system).
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Describe directory structure in memory.
Definition: TDirectory.h:34
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
Definition: TDirectory.cxx:805
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:497
virtual void SaveSelf(Bool_t=kFALSE)
Definition: TDirectory.h:186
virtual TList * GetListOfKeys() const
Definition: TDirectory.h:155
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:48
virtual void Close(Option_t *option="")
Close a file.
Definition: TFile.cxx:914
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
virtual void SetName(const char *name="")
Set graph name.
Definition: TGraph.cxx:2221
virtual void SetTitle(const char *title="")
Change (i.e.
Definition: TGraph.cxx:2237
TAxis * GetXaxis() const
Get x axis of the graph.
Definition: TGraph.cxx:1592
TAxis * GetYaxis() const
Get y axis of the graph.
Definition: TGraph.cxx:1602
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
virtual const char * GetClassName() const
Definition: TKey.h:71
virtual TObject * ReadObj()
To read a TObject* from the file.
Definition: TKey.cxx:722
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
TString fWeightFileDir
Definition: Config.h:122
void SetDrawProgressBar(Bool_t d)
Definition: Config.h:71
void SetUseColor(Bool_t uc)
Definition: Config.h:62
class TMVA::Config::VariablePlotting fVariablePlotting
void SetSilent(Bool_t s)
Definition: Config.h:65
IONames & GetIONames()
Definition: Config.h:100
void SetConfigDescription(const char *d)
Definition: Configurable.h:64
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void SetConfigName(const char *n)
Definition: Configurable.h:63
virtual void ParseOptions()
options parser
void CheckForUnusedOptions() const
checks for unused options in option string
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
Class that contains all the data information.
Definition: DataSet.h:69
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:427
const Event * GetEvent() const
Definition: DataSet.cxx:202
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:227
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:435
Abstract base class for all high level ml algorithms, you can book ml methods like BDT,...
Definition: Envelope.h:44
Bool_t fModelPersistence
file to save the results
Definition: Envelope.h:49
std::shared_ptr< DataLoader > fDataLoader
Booked method information.
Definition: Envelope.h:47
virtual void ParseOptions()
Method to parse the internal option string.
Definition: Envelope.cxx:187
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:237
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:392
Double_t GetROCIntegral(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get ROC-Integral value from mvas.
TGraph * GetROCGraph(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get TGraph object with the ROC curve.
void Show()
Method to print the results in stdout.
Bool_t IsMethod(TString methodname, TString methodtitle)
Method to check if method was booked.
std::map< UInt_t, std::vector< std::tuple< Float_t, Float_t, Bool_t > > > fMvaTest
ROCCurve * GetROC(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get TMVA::ROCCurve Object.
ClassificationResult & operator=(const ClassificationResult &r)
std::map< UInt_t, std::vector< std::tuple< Float_t, Float_t, Bool_t > > > fMvaTrain
Classification(DataLoader *loader, TFile *file, TString options)
Contructor to create a two class classifier.
Double_t GetROCIntegral(TString methodname, TString methodtitle, UInt_t iClass=0)
Method to get ROC-Integral value from mvas.
virtual void Test()
Perform test evaluation in all booked methods.
TString GetMethodOptions(TString methodname, TString methodtitle)
return the options for the booked method.
MethodBase * GetMethod(TString methodname, TString methodtitle)
Return a TMVA::MethodBase object.
virtual void TrainMethod(TString methodname, TString methodtitle)
Lets train an specific ml method.
Bool_t HasMethodObject(TString methodname, TString methodtitle, Int_t &index)
Allows to check if the TMVA::MethodBase was created and return the index in the vector.
std::vector< ClassificationResult > & GetResults()
return the the vector of TMVA::Experimental::ClassificationResult objects.
virtual void Train()
Method to train all booked ml methods.
virtual void Evaluate()
Method to perform Train/Test over all ml method booked.
Types::EAnalysisType fAnalysisType
vector of objects with booked methods
TMVA::ROCCurve * GetROC(TMVA::MethodBase *method, UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get TMVA::ROCCurve Object.
Bool_t IsCutsMethod(TMVA::MethodBase *method)
Allows to check if the ml method is a Cuts method.
void CopyFrom(TDirectory *src, TFile *file)
virtual void TestMethod(TString methodname, TString methodtitle)
Lets perform test an specific ml method.
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:369
void SetWeightFileDir(TString fileDir)
set directory of weight file
TString GetMethodTypeName() const
Definition: MethodBase.h:323
const char * GetName() const
Definition: MethodBase.h:325
const TString & GetTestvarName() const
Definition: MethodBase.h:326
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:411
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:427
const TString & GetMethodName() const
Definition: MethodBase.h:322
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:428
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:596
Types::EMVA GetMethodType() const
Definition: MethodBase.h:324
void SetFile(TFile *file)
Definition: MethodBase.h:366
DataSet * Data() const
Definition: MethodBase.h:400
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:373
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:351
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:438
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:86
DataSetManager * fDataSetManager
Definition: MethodBoost.h:193
Class for categorizing the phase space.
DataSetManager * fDataSetManager
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
static void InhibitOutput()
Definition: MsgLogger.cxx:74
static void EnableOutput()
Definition: MsgLogger.cxx:75
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
Definition: ROCCurve.cxx:251
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Definition: Results.h:57
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:899
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:576
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:336
Singleton class for Global types used by TMVA.
Definition: Types.h:73
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
@ kCategory
Definition: Types.h:99
@ kCuts
Definition: Types.h:80
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kMaxAnalysisType
Definition: Types.h:132
@ kTraining
Definition: Types.h:144
@ kTesting
Definition: Types.h:145
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Mother of all ROOT objects.
Definition: TObject.h:37
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
@ kOverwrite
overwrite existing object with same name
Definition: TObject.h:88
Principal Components Analysis (PCA)
Definition: TPrincipal.h:20
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Definition: TPrincipal.cxx:410
const TMatrixD * GetCovarianceMatrix() const
Definition: TPrincipal.h:58
virtual void MakePrincipals()
Perform the principal components analysis.
Definition: TPrincipal.cxx:869
Basic string class.
Definition: TString.h:131
const char * Data() const
Definition: TString.h:364
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:834
virtual int Unlink(const char *name)
Unlink, i.e.
Definition: TSystem.cxx:1371
A TTree represents a columnar dataset.
Definition: TTree.h:71
virtual TTree * CopyTree(const char *selection, Option_t *option="", Long64_t nentries=kMaxEntries, Long64_t firstentry=0)
Copy a tree with selection.
Definition: TTree.cxx:3567
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1741
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9370
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:753
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:146
TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
Definition: Cppyy.cxx:747
double T(double x)
Definition: ChebyshevPol.h:34
void GetMethodTitle(TString &name, TKey *ikey)
Definition: tmvaglob.cxx:341
create variable transformations
Config & gConfig()
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Bool_t IsNaN(Double_t x)
Definition: TMath.h:880
Double_t Log(Double_t x)
Definition: TMath.h:748
Definition: file.py:1
auto * m
Definition: textangle.C:8