Logo ROOT   6.10/09
Reference Guide
CrossValidation.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Omar Zapata, Thomas James Stevenson.
3 
4 #include "TMVA/CrossValidation.h"
5 
6 #include "TMVA/Config.h"
7 #include "TMVA/DataSet.h"
8 #include "TMVA/Event.h"
9 #include "TMVA/MethodBase.h"
10 #include "TMVA/MsgLogger.h"
12 #include "TMVA/tmvaglob.h"
13 #include "TMVA/Types.h"
14 
15 #include "TSystem.h"
16 #include "TAxis.h"
17 #include "TCanvas.h"
18 #include "TGraph.h"
19 #include "TMath.h"
20 
21 #include <iostream>
22 #include <memory>
23 
24 /*! \class TMVA::CrossValidationResult
25 \ingroup TMVA
26 */
27 
28 /*! \class TMVA::CrossValidation
29 \ingroup TMVA
30 */
31 
33 {
34 }
35 
37 {
38  fROCs=obj.fROCs;
39  fROCCurves = obj.fROCCurves;
40 }
41 
43 {
44  return fROCCurves.get();
45 }
46 
48 {
49  Float_t avg=0;
50  for(auto &roc:fROCs) avg+=roc.second;
51  return avg/fROCs.size();
52 }
53 
55 {
56  // NOTE: We are using here the unbiased estimation of the standard deviation.
57  Float_t std=0;
58  Float_t avg=GetROCAverage();
59  for(auto &roc:fROCs) std+=TMath::Power(roc.second-avg, 2);
60  return TMath::Sqrt(std/float(fROCs.size()-1.0));
61 }
62 
64 {
67 
68  MsgLogger fLogger("CrossValidation");
69  fLogger << kHEADER << " ==== Results ====" << Endl;
70  for(auto &item:fROCs)
71  fLogger << kINFO << Form("Fold %i ROC-Int : %.4f",item.first,item.second) << std::endl;
72 
73  fLogger << kINFO << "------------------------" << Endl;
74  fLogger << kINFO << Form("Average ROC-Int : %.4f",GetROCAverage()) << Endl;
75  fLogger << kINFO << Form("Std-Dev ROC-Int : %.4f",GetROCStandardDeviation()) << Endl;
76 
78 }
79 
81 {
82  TCanvas *c=new TCanvas(name.Data());
83  fROCCurves->Draw("AL");
84  fROCCurves->GetXaxis()->SetTitle(" Signal Efficiency ");
85  fROCCurves->GetYaxis()->SetTitle(" Background Rejection ");
86  Float_t adjust=1+fROCs.size()*0.01;
87  c->BuildLegend(0.15,0.15,0.4*adjust,0.5*adjust);
88  c->SetTitle("Cross Validation ROC Curves");
89  c->Draw();
90  return c;
91 }
92 
94 fNumFolds(5),fClassifier(new TMVA::Factory("CrossValidation","!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification"))
95 {
97 }
98 
100 {
101  fClassifier=nullptr;
102 }
103 
105 {
106  fNumFolds=i;
107  fDataLoader->MakeKFoldDataSet(fNumFolds);
109 }
110 
112 {
113  TString methodName = fMethod.GetValue<TString>("MethodName");
114  TString methodTitle = fMethod.GetValue<TString>("MethodTitle");
115  TString methodOptions = fMethod.GetValue<TString>("MethodOptions");
116  if(methodName == "") Log() << kFATAL << "No method booked for cross-validation" << Endl;
117 
120  Log() << kINFO << "Evaluate method: " << methodTitle << Endl;
122 
123  // Generate K folds on given dataset
124  if(!fFoldStatus){
125  fDataLoader->MakeKFoldDataSet(fNumFolds);
127  }
128 
129  // Process K folds
130  for(UInt_t i=0; i<fNumFolds; ++i){
131  Log() << kDEBUG << "Fold (" << methodTitle << "): " << i << Endl;
132  // Get specific fold of dataset and setup method
133  TString foldTitle = methodTitle;
134  foldTitle += "_fold";
135  foldTitle += i+1;
136 
137  fDataLoader->PrepareFoldDataSet(i, TMVA::Types::kTesting);
138  MethodBase* smethod = fClassifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions);
139 
140  // Train method
142  smethod->TrainMethod();
143 
144  // Test method
146  smethod->AddOutput(Types::kTesting, smethod->GetAnalysisType());
147  smethod->TestClassification();
148 
149  // Store results
150  fResults.fROCs[i] = fClassifier->GetROCIntegral(fDataLoader->GetName(),methodTitle);
151 
152  TGraph* gr = fClassifier->GetROCCurve(fDataLoader->GetName(), methodTitle, true);
153  gr->SetLineColor(i+1);
154  gr->SetLineWidth(2);
155  gr->SetTitle(foldTitle.Data());
156  fResults.fROCCurves->Add(gr);
157 
158  fResults.fSigs.push_back(smethod->GetSignificance());
159  fResults.fSeps.push_back(smethod->GetSeparation());
160 
161  Double_t err;
162  fResults.fEff01s.push_back(smethod->GetEfficiency("Efficiency:0.01",Types::kTesting, err));
163  fResults.fEff10s.push_back(smethod->GetEfficiency("Efficiency:0.10",Types::kTesting,err));
164  fResults.fEff30s.push_back(smethod->GetEfficiency("Efficiency:0.30",Types::kTesting,err));
165  fResults.fEffAreas.push_back(smethod->GetEfficiency("" ,Types::kTesting,err));
166  fResults.fTrainEff01s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.01"));
167  fResults.fTrainEff10s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.10"));
168  fResults.fTrainEff30s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.30"));
169 
170  // Clean-up for this fold
173  fClassifier->DeleteAllMethods();
174  fClassifier->fMethodsMap.clear();
175  }
176 
178  Log() << kINFO << "Evaluation done." << Endl;
180 }
181 
183  if(fResults.fROCs.size()==0) Log() << kFATAL << "No cross-validation results available" << Endl;
184  return fResults;
185 }
std::vector< Double_t > fSigs
virtual void SetLineWidth(Width_t lwidth)
Set the line width.
Definition: TAttLine.h:43
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
float Float_t
Definition: RtypesCore.h:53
void SetTitle(const char *title="")
Set canvas title.
Definition: TCanvas.cxx:1942
const CrossValidationResult & GetResults() const
T GetValue(const TString &key)
Definition: OptionMap.h:144
CrossValidationResult fResults
Config & gConfig()
MsgLogger & Log() const
Definition: Configurable.h:122
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:35
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
Basic string class.
Definition: TString.h:129
bool Bool_t
Definition: RtypesCore.h:59
std::unique_ptr< Factory > fClassifier
virtual void SetTitle(const char *title="")
Set graph title.
Definition: TGraph.cxx:2180
STL namespace.
std::vector< Double_t > fEff10s
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:628
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:392
OptionMap fMethod
Definition: Envelope.h:38
DataSet * Data() const
Definition: MethodBase.h:393
void SetNumFolds(UInt_t i)
Base class for all machine learning algorithms.
Definition: Envelope.h:35
std::vector< Double_t > fTrainEff01s
std::vector< Double_t > fTrainEff10s
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition: TAttLine.h:40
std::vector< Double_t > fEff01s
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Definition: DataSet.cxx:316
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
Float_t GetROCStandardDeviation() const
const TString & GetMethodName() const
Definition: MethodBase.h:315
This is the main MVA steering class.
Definition: Factory.h:81
std::vector< Double_t > fTrainEff30s
virtual Double_t GetSignificance() const
compute significance of mean difference
TGraphErrors * gr
Definition: legend1.C:25
CrossValidation(DataLoader *loader)
virtual void Evaluate()
Virtual method to be implemented with your algorithm.
const Bool_t kFALSE
Definition: RtypesCore.h:92
The Canvas class.
Definition: TCanvas.h:31
double Double_t
Definition: RtypesCore.h:55
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
std::shared_ptr< DataLoader > fDataLoader
Definition: Envelope.h:39
TMultiGraph * GetROCCurves(Bool_t fLegend=kTRUE)
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
std::vector< Double_t > fEffAreas
virtual void Draw(Option_t *option="")
Draw a canvas.
Definition: TCanvas.cxx:817
std::vector< Double_t > fSeps
Abstract ClassifierFactory template that handles arbitrary types.
virtual TLegend * BuildLegend(Double_t x1=0.3, Double_t y1=0.21, Double_t x2=0.3, Double_t y2=0.21, const char *title="", Option_t *option="")
Build a legend from the graphical objects in the pad.
Definition: TPad.cxx:465
void SetSilent(Bool_t s)
Definition: Config.h:60
std::map< UInt_t, Float_t > fROCs
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
TCanvas * Draw(const TString name="CrossValidation") const
std::vector< Double_t > fEff30s
virtual Double_t GetTrainingEfficiency(const TString &)
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:421
Double_t Sqrt(Double_t x)
Definition: TMath.h:591
static void EnableOutput()
Definition: MsgLogger.cxx:75
const Bool_t kTRUE
Definition: RtypesCore.h:91
virtual void TestClassification()
initialization
std::shared_ptr< TMultiGraph > fROCCurves
const char * Data() const
Definition: TString.h:347