Logo ROOT   6.08/07
Reference Guide
CrossValidation.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Omar Zapata, Thomas James Stevenson.
3 
4 #include "TMVA/CrossValidation.h"
5 
6 #include "TMVA/Config.h"
7 #include "TMVA/DataSet.h"
8 #include "TMVA/Event.h"
9 #include "TMVA/MethodBase.h"
10 #include "TMVA/MsgLogger.h"
12 #include "TMVA/tmvaglob.h"
13 #include "TMVA/Types.h"
14 
15 #include "TSystem.h"
16 #include "TAxis.h"
17 #include "TCanvas.h"
18 #include "TGraph.h"
19 #include "TMath.h"
20 
21 #include <iostream>
22 #include <memory>
23 
25 {
26 }
27 
29 {
30  fROCs=obj.fROCs;
31  fROCCurves = obj.fROCCurves;
32 }
33 
35 {
36  return fROCCurves.get();
37 }
38 
40 {
41  Float_t avg=0;
42  for(auto &roc:fROCs) avg+=roc.second;
43  return avg/fROCs.size();
44 }
45 
47 {
48  // NOTE: We are using here the unbiased estimation of the standard deviation.
49  Float_t std=0;
50  Float_t avg=GetROCAverage();
51  for(auto &roc:fROCs) std+=TMath::Power(roc.second-avg, 2);
52  return TMath::Sqrt(std/float(fROCs.size()-1.0));
53 }
54 
56 {
59 
60  MsgLogger fLogger("CrossValidation");
61  fLogger << kHEADER << " ==== Results ====" << Endl;
62  for(auto &item:fROCs)
63  fLogger << kINFO << Form("Fold %i ROC-Int : %.4f",item.first,item.second) << std::endl;
64 
65  fLogger << kINFO << "------------------------" << Endl;
66  fLogger << kINFO << Form("Average ROC-Int : %.4f",GetROCAverage()) << Endl;
67  fLogger << kINFO << Form("Std-Dev ROC-Int : %.4f",GetROCStandardDeviation()) << Endl;
68 
70 }
71 
73 {
74  TCanvas *c=new TCanvas(name.Data());
75  fROCCurves->Draw("AL");
76  fROCCurves->GetXaxis()->SetTitle(" Signal Efficiency ");
77  fROCCurves->GetYaxis()->SetTitle(" Background Rejection ");
78  Float_t adjust=1+fROCs.size()*0.01;
79  c->BuildLegend(0.15,0.15,0.4*adjust,0.5*adjust);
80  c->SetTitle("Cross Validation ROC Curves");
81  c->Draw();
82  return c;
83 }
84 
86 fNumFolds(5),fClassifier(new TMVA::Factory("CrossValidation","!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification"))
87 {
89 }
90 
92 {
93  fClassifier=nullptr;
94 }
95 
97 {
98  fNumFolds=i;
99  fDataLoader->MakeKFoldDataSet(fNumFolds);
101 }
102 
104 {
105  TString methodName = fMethod.GetValue<TString>("MethodName");
106  TString methodTitle = fMethod.GetValue<TString>("MethodTitle");
107  TString methodOptions = fMethod.GetValue<TString>("MethodOptions");
108  if(methodName == "") Log() << kFATAL << "No method booked for cross-validation" << Endl;
109 
112  Log() << kINFO << "Evaluate method: " << methodTitle << Endl;
114 
115  // Generate K folds on given dataset
116  if(!fFoldStatus){
117  fDataLoader->MakeKFoldDataSet(fNumFolds);
119  }
120 
121  // Process K folds
122  for(UInt_t i=0; i<fNumFolds; ++i){
123  Log() << kDEBUG << "Fold (" << methodTitle << "): " << i << Endl;
124  // Get specific fold of dataset and setup method
125  TString foldTitle = methodTitle;
126  foldTitle += "_fold";
127  foldTitle += i+1;
128 
129  fDataLoader->PrepareFoldDataSet(i, TMVA::Types::kTesting);
130  MethodBase* smethod = fClassifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions);
131 
132  // Train method
134  smethod->TrainMethod();
135 
136  // Test method
138  smethod->AddOutput(Types::kTesting, smethod->GetAnalysisType());
139  smethod->TestClassification();
140 
141  // Store results
142  fResults.fROCs[i] = fClassifier->GetROCIntegral(fDataLoader->GetName(),methodTitle);
143 
144  TGraph* gr = fClassifier->GetROCCurve(fDataLoader->GetName(), methodTitle, true);
145  gr->SetLineColor(i+1);
146  gr->SetLineWidth(2);
147  gr->SetTitle(foldTitle.Data());
148  fResults.fROCCurves->Add(gr);
149 
150  fResults.fSigs.push_back(smethod->GetSignificance());
151  fResults.fSeps.push_back(smethod->GetSeparation());
152 
153  Double_t err;
154  fResults.fEff01s.push_back(smethod->GetEfficiency("Efficiency:0.01",Types::kTesting, err));
155  fResults.fEff10s.push_back(smethod->GetEfficiency("Efficiency:0.10",Types::kTesting,err));
156  fResults.fEff30s.push_back(smethod->GetEfficiency("Efficiency:0.30",Types::kTesting,err));
157  fResults.fEffAreas.push_back(smethod->GetEfficiency("" ,Types::kTesting,err));
158  fResults.fTrainEff01s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.01"));
159  fResults.fTrainEff10s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.10"));
160  fResults.fTrainEff30s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.30"));
161 
162  // Clean-up for this fold
165  fClassifier->DeleteAllMethods();
166  fClassifier->fMethodsMap.clear();
167  }
168 
170  Log() << kINFO << "Evaluation done." << Endl;
172 }
173 
175  if(fResults.fROCs.size()==0) Log() << kFATAL << "No cross-validation results available" << Endl;
176  return fResults;
177 }
Config & gConfig()
Definition: Config.cxx:43
std::vector< Double_t > fSigs
virtual void SetLineWidth(Width_t lwidth)
Set the line width.
Definition: TAttLine.h:49
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
float Float_t
Definition: RtypesCore.h:53
void SetTitle(const char *title="")
Set canvas title.
Definition: TCanvas.cxx:1917
return c
const CrossValidationResult & GetResults() const
T GetValue(const TString &key)
Definition: OptionMap.h:152
CrossValidationResult fResults
MsgLogger & Log() const
Definition: Configurable.h:128
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:37
Basic string class.
Definition: TString.h:137
bool Bool_t
Definition: RtypesCore.h:59
std::unique_ptr< Factory > fClassifier
const Bool_t kFALSE
Definition: Rtypes.h:92
virtual void SetTitle(const char *title="")
Set graph title.
Definition: TGraph.cxx:2176
STL namespace.
std::vector< Double_t > fEff10s
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:501
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:388
OptionMap fMethod
Definition: Envelope.h:58
DataSet * Data() const
Definition: MethodBase.h:405
void SetNumFolds(UInt_t i)
Base class for all machine learning algorithms.
Definition: Envelope.h:55
std::vector< Double_t > fTrainEff01s
std::vector< Double_t > fTrainEff10s
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition: TAttLine.h:46
std::vector< Double_t > fEff01s
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particulary Method instance (here appareantly called resultsName i...
Definition: DataSet.cxx:337
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
Float_t GetROCStandardDeviation() const
const TString & GetMethodName() const
Definition: MethodBase.h:327
std::vector< Double_t > fTrainEff30s
virtual Double_t GetSignificance() const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
TGraphErrors * gr
Definition: legend1.C:25
CrossValidation(DataLoader *loader)
virtual void Evaluate()
Virtual method to be implmented with your algorithm.
The Canvas class.
Definition: TCanvas.h:41
double Double_t
Definition: RtypesCore.h:55
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
std::shared_ptr< DataLoader > fDataLoader
Definition: Envelope.h:59
TMultiGraph * GetROCCurves(Bool_t fLegend=kTRUE)
std::vector< Double_t > fEffAreas
virtual void Draw(Option_t *option="")
Draw a canvas.
Definition: TCanvas.cxx:795
std::vector< Double_t > fSeps
Abstract ClassifierFactory template that handles arbitrary types.
void SetSilent(Bool_t s)
Definition: Config.h:64
std::map< UInt_t, Float_t > fROCs
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:53
TCanvas * Draw(const TString name="CrossValidation") const
std::vector< Double_t > fEff30s
virtual Double_t GetTrainingEfficiency(const TString &)
virtual TLegend * BuildLegend(Double_t x1=0.5, Double_t y1=0.67, Double_t x2=0.88, Double_t y2=0.88, const char *title="", Option_t *option="")
Build a legend from the graphical objects in the pad.
Definition: TPad.cxx:426
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
static void EnableOutput()
Definition: MsgLogger.cxx:70
const Bool_t kTRUE
Definition: Rtypes.h:91
virtual void TestClassification()
initialization
std::shared_ptr< TMultiGraph > fROCCurves
char name[80]
Definition: TGX11.cxx:109
const char * Data() const
Definition: TString.h:349