Logo ROOT   6.10/09
Reference Guide
regression_averagedevs.cxx
Go to the documentation of this file.
1 #include <limits>
2 
4 
5 #include "TLatex.h"
6 #include "TGraphErrors.h"
7 #include "TFrame.h"
8 
9 /*
10  this macro plots the quadratic deviation of the estimated from the target value, averaged over the first nevt events in test sample (all if Nevt=-1)
11  a) normal average
12  b) truncated average, using best 90%
13  created January 2009, Eckhard von Toerne, University of Bonn, Germany
14 */
15 
16 void TMVA::regression_averagedevs(TString dataset,TString fin, Int_t Nevt, Bool_t useTMVAStyle )
17 {
18  bool debug=false;
19  if (Nevt <0) Nevt=1000000;
20  TMVAGlob::Initialize( useTMVAStyle );
21  // checks if file with name "fin" is already open, and if not opens one
22  TFile* file = TMVAGlob::OpenFile( fin );
23  TList jobDirList;
24  TMVAGlob::GetListOfJobs((TFile*)file->GetDirectory(dataset.Data()),jobDirList);
25  if (jobDirList.GetSize()==0) {
26  cout << "error could not find jobs" << endl;
27  return;
28  }
29 
30  Bool_t __PLOT_LOGO__ = kTRUE;
31  Bool_t __SAVE_IMAGE__ = kTRUE;
32 
33  TDirectory* dir0 = (TDirectory*) (jobDirList.At(0));
34  //TDirectory* dir0 = (TDirectory*) (file->Get("InputVariables_Id"));
35  Int_t nTargets = TMVAGlob::GetNumberOfTargets( dir0);
36 
37  if (debug) cout << "found targets " << nTargets<<endl;
38  TCanvas* c=0;
39  for (Int_t itrgt = 0 ; itrgt < nTargets; itrgt++){
40  if (debug) cout << "loop targets " << itrgt<<endl;
41  TString xtit = "Method";
42  TString ytit = "Average Quadratic Deviation";
43  TString ftit = ytit + " versus " + xtit + Form(" for target %d",itrgt);
44  c = new TCanvas( Form("c%d",itrgt), ftit , 50+20*itrgt, 10*itrgt, 750, 650 );
45 
46  // global style settings
47  c->SetGrid();
48  c->SetTickx(1);
49  c->SetTicky(0);
50  c->SetTopMargin(0.28);
51  c->SetBottomMargin(0.1);
52 
53  TString hNameRef(Form("regression_average_devs_target%d",itrgt));
54 
55  const Int_t maxMethods = 100;
56  // const Int_t maxTargets = 100;
57  Float_t m[4][maxMethods]; // h0 train-all, h1 train-90%, h2 test-all, h3 test-90%
58  Float_t em[4][maxMethods];
59  Float_t x[4][maxMethods];
60  Float_t ex[4][maxMethods];
61 
62  TIter next(&jobDirList);
63  Float_t mymax=0., mymin=std::numeric_limits<float>::max();
64  TString mvaNames[maxMethods];
65  TDirectory *jobDir;
66  Int_t nMethods = 0;
67  // loop over all methods
68  while ( (jobDir = (TDirectory*)next()) ) {
69  TString methodTitle;
70  TMVAGlob::GetMethodTitle(methodTitle,jobDir);
71  mvaNames[nMethods]=methodTitle;
72  if (debug) cout << "--- Found directory for method: " << methodTitle << endl;
73  TIter keyIt(jobDir->GetListOfKeys());
74  TKey *histKey;
75  while ( (histKey = (TKey*)keyIt()) ) {
76  if (histKey->ReadObj()->InheritsFrom("TH1F") ){
77  TString s(histKey->ReadObj()->GetName());
78  if( !s.Contains("Quadr_Dev") ) continue;
79  if( !s.Contains(Form("target_%d_",itrgt))) continue;
80  Int_t ihist = 0 ;
81  if( !s.Contains("best90perc") && s.Contains("train")) ihist=0;
82  if( s.Contains("best90perc") && s.Contains("train")) ihist=1;
83  if( !s.Contains("best90perc") && s.Contains("test")) ihist=2;
84  if( s.Contains("best90perc") && s.Contains("test")) ihist=3;
85  if (debug) cout <<"using histogram" << s << ", ihist="<<ihist<<endl;
86  TH1F* h = (TH1F*) (histKey->ReadObj());
87  m[ihist][nMethods] = sqrt(h->GetMean());
88  em[ihist][nMethods] = h->GetRMS()/(sqrt(h->GetEntries())*2.*h->GetMean());
89  x[ihist][nMethods] = nMethods+0.44+0.12*ihist;
90  ex[ihist][nMethods] = 0.001;
91  mymax= m[ihist][nMethods] > mymax ? m[ihist][nMethods] : mymax;
92  mymin= m[ihist][nMethods] < mymin ? m[ihist][nMethods] : mymin;
93  if (debug) cout << "m"<< ihist << "="<<m[ihist][nMethods]<<endl;
94  }
95  }
96  nMethods++;
97  }
98  TH1F* haveragedevs= new TH1F(Form("haveragedevs%d",itrgt),ftit,nMethods,0.,nMethods);
99  for (int i=0;i<nMethods;i++) haveragedevs->GetXaxis()->SetBinLabel(i+1, mvaNames[i]);
100  haveragedevs->SetStats(0);
101  TGraphErrors* graphTrainAv= new TGraphErrors(nMethods,x[0],m[0],ex[0],em[0]);
102  TGraphErrors* graphTruncTrainAv= new TGraphErrors(nMethods,x[1],m[1],ex[1],em[1]);
103  TGraphErrors* graphTestAv= new TGraphErrors(nMethods,x[2],m[2],ex[2],em[2]);
104  TGraphErrors* graphTruncTestAv= new TGraphErrors(nMethods,x[3],m[3],ex[3],em[3]);
105 
106  Double_t xmax = 1.2 * mymax;
107  Double_t xmin = 0.8 * mymin - (mymax - mymin)*0.05;
108  Double_t xheader = 0.2;
109  Double_t yheader = xmax*0.92;
110  xmin = xmin > 0.? xmin : 0.;
111  if (mymin > 1.e-20 && log10(mymax/mymin)>1.5){
112  c->SetLogy();
113  cout << "--- result differ significantly using log scale for display of regression results"<< endl;
114  xmax = 1.5 * xmax;
115  xmin = 0.75 * mymin;
116  yheader = xmax*0.78;
117  }
118  Float_t x0L = 0.03, y0H = 0.91;
119  Float_t dxL = 0.457-x0L, dyH = 0.14;
120  // TLegend *legend = new TLegend( x0L, y0H-dyH, x0L+dxL, y0H , "Average Deviation = (#sum_{evts} (f_{MVA} - f_{target})^{2} )^{1/2}");
121  TLegend *legend = new TLegend( x0L, y0H-dyH, x0L+dxL, y0H );
122  legend->SetTextSize( 0.035 );
123  legend->SetTextAlign(12);
124  legend->SetMargin( 0.1 );
125 
126  TH1F *hr = c->DrawFrame(-1.,0.,nMethods+1, xmax);
127  cout << endl;
128  cout << "Training: Average Deviation between target " << itrgt <<" and estimate" << endl;
129  cout << Form("%-15s%-15s%-15s", "Method","Average Dev.","trunc. Aver.(90%)") <<endl;
130  for (int i=0;i<nMethods;i++){
131  cout << Form("%-15s:%#10.3g%#10.3g",
132  (const char*)mvaNames[i], m[0][i],m[1][i])<<endl;
133  // cout << mvaNames[i] << " " << m[0][i]<< " "<< m[1][i]<<endl;
134  hr->GetXaxis()->SetBinLabel(i+1," ");
135  }
136  cout << endl;
137  cout << "Testing: Average Deviation between target " << itrgt <<" and estimate" << endl;
138  cout << Form("%-15s%-15s%-15s", "Method","Average Dev.","trunc. Aver.(90%)") <<endl;
139  for (int i=0;i<nMethods;i++){
140  cout << Form("%-15s:%#10.3g%#10.3g",
141  (const char*)mvaNames[i], m[2][i],m[3][i])<<endl;
142  //cout << mvaNames[i] << " " << m[2][i]<< " "<< m[3][i]<<endl;
143  }
144 
145  haveragedevs->SetMinimum(xmin);
146  haveragedevs->SetMaximum(xmax);
147  haveragedevs->SetXTitle("Method");
148  haveragedevs->SetYTitle("Deviation from target");
149  haveragedevs->Draw();
150  c->GetFrame()->SetFillColor(21);
151  c->GetFrame()->SetBorderSize(12);
152  graphTrainAv->SetMarkerSize(1.);
153  graphTrainAv->SetMarkerColor(kBlue);
154  graphTrainAv->SetMarkerStyle(25);
155  graphTrainAv->Draw("P");
156 
157  graphTruncTrainAv->SetMarkerSize(1.);
158  graphTruncTrainAv->SetMarkerColor(kBlack);
159  graphTruncTrainAv->SetMarkerStyle(25);
160  graphTruncTrainAv->Draw("P");
161 
162  graphTestAv->SetMarkerSize(1.);
163  graphTestAv->SetMarkerColor(kBlue);
164  graphTestAv->SetMarkerStyle(21);
165  graphTestAv->Draw("P");
166 
167  graphTruncTestAv->SetMarkerSize(1.);
168  graphTruncTestAv->SetMarkerColor(kBlack);
169  graphTruncTestAv->SetMarkerStyle(21);
170  graphTruncTestAv->Draw("P");
171  legend->AddEntry(graphTrainAv,TString("Training Sample, Average Deviation"),"p");
172  legend->AddEntry(graphTruncTrainAv,TString("Training Sample, truncated Average Dev. (best 90%)"),"p");
173  legend->AddEntry(graphTestAv,TString("Test Sample, Average Deviation"),"p");
174  legend->AddEntry(graphTruncTestAv,TString("Test Sample, truncated Average Dev. (best 90%)"),"p");
175 
176  legend->Draw();
177  TLatex legHeader;
178  legHeader.SetTextSize(0.035);
179  legHeader.SetTextAlign(12);
180  //legHeader.DrawLatex(x0L, y0H+0.01, "Average Deviation = (#sum (_{ } f_{MVA} - f_{target})^{2} )^{1/2}");
181  legHeader.DrawLatex(xheader, yheader, "Average Deviation = (#sum (_{ } f_{MVA} - f_{target})^{2} )^{1/2}");
182  // ============================================================
183 
184  if (__PLOT_LOGO__) TMVAGlob::plot_logo();
185  // ============================================================
186 
187  c->Update();
188  TString fname = dataset+"/plots/" + hNameRef;
189  if (__SAVE_IMAGE__) TMVAGlob::imgconv( c, fname );
190  } // end loop itrgt
191  return;
192 }
193 
virtual void SetTickx(Int_t value=1)
Definition: TPad.h:344
void imgconv(TCanvas *c, const TString &fname)
Definition: tmvaglob.cxx:212
float xmin
Definition: THbookFile.cxx:93
virtual TList * GetListOfKeys() const
Definition: TDirectory.h:148
virtual void SetMaximum(Double_t maximum=-1111)
Definition: TH1.h:375
This class displays a legend box (TPaveText) containing several legend entries.
Definition: TLegend.h:23
float Float_t
Definition: RtypesCore.h:53
TFile * OpenFile(const TString &fin)
Definition: tmvaglob.cxx:192
virtual void Draw(Option_t *option="")
Draw this legend with its current attributes.
Definition: TLegend.cxx:452
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:311
Definition: Rtypes.h:55
TH1 * h
Definition: legend2.C:5
TH1F * DrawFrame(Double_t xmin, Double_t ymin, Double_t xmax, Double_t ymax, const char *title="")
Draw an empty pad frame with X and Y axis.
Definition: TPad.cxx:1537
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:376
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6763
Basic string class.
Definition: TString.h:129
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:551
int Int_t
Definition: RtypesCore.h:41
virtual void SetYTitle(const char *title)
Definition: TH1.h:390
bool Bool_t
Definition: RtypesCore.h:59
void SetMargin(Float_t margin)
Definition: TLegend.h:69
Double_t GetRMS(Int_t axis=1) const
Definition: TH1.h:296
virtual void Draw(Option_t *chopt="")
Draw this graph with its current attributes.
Definition: TGraph.cxx:745
TLegend * legend
Definition: pirndm.C:35
virtual void SetTopMargin(Float_t topmargin)
Set Pad top margin in fraction of the pad height.
Definition: TAttPad.cxx:130
double sqrt(double)
Double_t x[n]
Definition: legend1.C:17
TFrame * GetFrame()
Get frame.
Definition: TPad.cxx:2794
you should not use this method at all Int_t Int_t Double_t Double_t em
Definition: TRolke.cxx:630
Int_t GetNumberOfTargets(TDirectory *dir)
Definition: tmvaglob.cxx:396
double log10(double)
virtual void SetGrid(Int_t valuex=1, Int_t valuey=1)
Definition: TPad.h:323
To draw Mathematical Formula.
Definition: TLatex.h:18
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Definition: TAttMarker.h:38
void GetMethodTitle(TString &name, TKey *ikey)
Definition: tmvaglob.cxx:341
TLatex * DrawLatex(Double_t x, Double_t y, const char *text)
Make a copy of this object with the new parameters And copy object attributes.
Definition: TLatex.cxx:1918
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
virtual void SetBorderSize(Short_t bordersize)
Definition: TWbox.h:50
virtual void SetTextAlign(Short_t align=11)
Set the text alignment.
Definition: TAttText.h:41
A doubly linked list.
Definition: TList.h:43
void regression_averagedevs(TString dataset, TString fin, Int_t Nevt=-1, Bool_t useTMVAStyle=kTRUE)
virtual void SetBottomMargin(Float_t bottommargin)
Set Pad bottom margin in fraction of the pad height.
Definition: TAttPad.cxx:100
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
virtual void Draw(Option_t *option="")
Draw this histogram with options.
Definition: TH1.cxx:2851
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition: TAttFill.h:37
virtual void SetTicky(Int_t value=1)
Definition: TPad.h:345
UInt_t GetListOfJobs(TFile *file, TList &jobdirs)
Definition: tmvaglob.cxx:607
TMarker * m
Definition: textangle.C:8
char * Form(const char *fmt,...)
virtual void SetMarkerStyle(Style_t mstyle=1)
Set the marker style.
Definition: TAttMarker.h:40
float xmax
Definition: THbookFile.cxx:93
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:315
void plot_logo(Float_t v_scale=1.0, Float_t skew=1.0)
Definition: tmvaglob.cxx:263
virtual TDirectory * GetDirectory(const char *apath, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory named "apath".
virtual void SetMarkerSize(Size_t msize=1)
Set the marker size.
Definition: TAttMarker.h:41
The Canvas class.
Definition: TCanvas.h:31
double Double_t
Definition: RtypesCore.h:55
TLegendEntry * AddEntry(const TObject *obj, const char *label="", Option_t *option="lpf")
Add a new entry to this legend.
Definition: TLegend.cxx:359
Describe directory structure in memory.
Definition: TDirectory.h:34
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
virtual Double_t GetEntries() const
Return the current number of entries.
Definition: TH1.cxx:4054
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition: TAxis.cxx:809
virtual void SetXTitle(const char *title)
Definition: TH1.h:389
Definition: file.py:1
A TGraphErrors is a TGraph with error bars.
Definition: TGraphErrors.h:26
bool debug
Definition: Rtypes.h:56
virtual void SetTextSize(Float_t tsize=1)
Set the text size.
Definition: TAttText.h:46
virtual void Update()
Update canvas pad buffers.
Definition: TCanvas.cxx:2208
virtual Int_t GetSize() const
Definition: TCollection.h:89
const Bool_t kTRUE
Definition: RtypesCore.h:91
Double_t ex[n]
Definition: legend1.C:17
virtual void SetStats(Bool_t stats=kTRUE)
Set statistics option on/off.
Definition: TH1.cxx:8103
TAxis * GetXaxis()
Definition: TH1.h:300
const char * Data() const
Definition: TString.h:347
virtual void SetLogy(Int_t value=1)
Set Lin/Log scale for Y.
Definition: TPad.cxx:5780