Logo ROOT   6.14/05
Reference Guide
TMVARegressionApplication.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_tmva
3 /// \notebook -nodraw
4 /// This macro provides a simple example on how to use the trained regression MVAs
5 /// within an analysis module
6 ///
7 /// - Project : TMVA - a Root-integrated toolkit for multivariate data analysis
8 /// - Package : TMVA
9 /// - Exectuable: TMVARegressionApplication
10 ///
11 /// \macro_output
12 /// \macro_code
13 /// \author Andreas Hoecker
14 
15 #include <cstdlib>
16 #include <vector>
17 #include <iostream>
18 #include <map>
19 #include <string>
20 
21 #include "TFile.h"
22 #include "TTree.h"
23 #include "TString.h"
24 #include "TSystem.h"
25 #include "TROOT.h"
26 #include "TStopwatch.h"
27 
28 #include "TMVA/Tools.h"
29 #include "TMVA/Reader.h"
30 
31 using namespace TMVA;
32 
33 void TMVARegressionApplication( TString myMethodList = "" )
34 {
35  //---------------------------------------------------------------
36  // This loads the library
38 
39  // Default MVA methods to be trained + tested
40  std::map<std::string,int> Use;
41 
42  // --- Mutidimensional likelihood and Nearest-Neighbour methods
43  Use["PDERS"] = 0;
44  Use["PDEFoam"] = 1;
45  Use["KNN"] = 1;
46  //
47  // --- Linear Discriminant Analysis
48  Use["LD"] = 1;
49  //
50  // --- Function Discriminant analysis
51  Use["FDA_GA"] = 0;
52  Use["FDA_MC"] = 0;
53  Use["FDA_MT"] = 0;
54  Use["FDA_GAMT"] = 0;
55  //
56  // --- Neural Network
57  Use["MLP"] = 0;
58 #ifdef R__HAS_TMVACPU
59  Use["DNN_CPU"] = 1;
60 #else
61  Use["DNN_CPU"] = 0;
62 #endif
63 
64  //
65  // --- Support Vector Machine
66  Use["SVM"] = 0;
67  //
68  // --- Boosted Decision Trees
69  Use["BDT"] = 0;
70  Use["BDTG"] = 1;
71  // ---------------------------------------------------------------
72 
73  std::cout << std::endl;
74  std::cout << "==> Start TMVARegressionApplication" << std::endl;
75 
76  // Select methods (don't look at this code - not of interest)
77  if (myMethodList != "") {
78  for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
79 
80  std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
81  for (UInt_t i=0; i<mlist.size(); i++) {
82  std::string regMethod(mlist[i]);
83 
84  if (Use.find(regMethod) == Use.end()) {
85  std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
86  for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
87  std::cout << std::endl;
88  return;
89  }
90  Use[regMethod] = 1;
91  }
92  }
93 
94  // --------------------------------------------------------------------------------------------------
95 
96  // --- Create the Reader object
97 
98  TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" );
99 
100  // Create a set of variables and declare them to the reader
101  // - the variable names MUST corresponds in name and type to those given in the weight file(s) used
102  Float_t var1, var2;
103  reader->AddVariable( "var1", &var1 );
104  reader->AddVariable( "var2", &var2 );
105 
106  // Spectator variables declared in the training have to be added to the reader, too
107  Float_t spec1,spec2;
108  reader->AddSpectator( "spec1:=var1*2", &spec1 );
109  reader->AddSpectator( "spec2:=var1*3", &spec2 );
110 
111  // --- Book the MVA methods
112 
113  TString dir = "dataset/weights/";
114  TString prefix = "TMVARegression";
115 
116  // Book method(s)
117  for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) {
118  if (it->second) {
119  TString methodName = it->first + " method";
120  TString weightfile = dir + prefix + "_" + TString(it->first) + ".weights.xml";
121  reader->BookMVA( methodName, weightfile );
122  }
123  }
124 
125  // Book output histograms
126  TH1* hists[100];
127  Int_t nhists = -1;
128  for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) {
129  TH1* h = new TH1F( it->first.c_str(), TString(it->first) + " method", 100, -100, 600 );
130  if (it->second) hists[++nhists] = h;
131  }
132  nhists++;
133 
134  // Prepare input tree (this must be replaced by your data source)
135  // in this example, there is a toy tree with signal and one with background events
136  // we'll later on use only the "signal" events for the test in this example.
137  //
138  TFile *input(0);
139  TString fname = "./tmva_reg_example.root";
140  if (!gSystem->AccessPathName( fname )) {
141  input = TFile::Open( fname ); // check if file in local directory exists
142  }
143  else {
145  input = TFile::Open("http://root.cern.ch/files/tmva_reg_example.root", "CACHEREAD"); // if not: download from ROOT server
146  }
147  if (!input) {
148  std::cout << "ERROR: could not open data file" << std::endl;
149  exit(1);
150  }
151  std::cout << "--- TMVARegressionApp : Using input file: " << input->GetName() << std::endl;
152 
153  // --- Event loop
154 
155  // Prepare the tree
156  // - here the variable names have to corresponds to your tree
157  // - you can use the same variables as above which is slightly faster,
158  // but of course you can use different ones and copy the values inside the event loop
159  //
160  TTree* theTree = (TTree*)input->Get("TreeR");
161  std::cout << "--- Select signal sample" << std::endl;
162  theTree->SetBranchAddress( "var1", &var1 );
163  theTree->SetBranchAddress( "var2", &var2 );
164 
165  std::cout << "--- Processing: " << theTree->GetEntries() << " events" << std::endl;
166  TStopwatch sw;
167  sw.Start();
168  for (Long64_t ievt=0; ievt<theTree->GetEntries();ievt++) {
169 
170  if (ievt%1000 == 0) {
171  std::cout << "--- ... Processing event: " << ievt << std::endl;
172  }
173 
174  theTree->GetEntry(ievt);
175 
176  // Retrieve the MVA target values (regression outputs) and fill into histograms
177  // NOTE: EvaluateRegression(..) returns a vector for multi-target regression
178 
179  for (Int_t ih=0; ih<nhists; ih++) {
180  TString title = hists[ih]->GetTitle();
181  Float_t val = (reader->EvaluateRegression( title ))[0];
182  hists[ih]->Fill( val );
183  }
184  }
185  sw.Stop();
186  std::cout << "--- End of event loop: "; sw.Print();
187 
188  // --- Write histograms
189 
190  TFile *target = new TFile( "TMVARegApp.root","RECREATE" );
191  for (Int_t ih=0; ih<nhists; ih++) hists[ih]->Write();
192  target->Close();
193 
194  std::cout << "--- Created root file: \"" << target->GetName()
195  << "\" containing the MVA output histograms" << std::endl;
196 
197  delete reader;
198 
199  std::cout << "==> TMVARegressionApplication is done!" << std::endl << std::endl;
200 }
201 
202 int main( int argc, char** argv )
203 {
204  // Select methods (don't look at this code - not of interest)
205  TString methodList;
206  for (int i=1; i<argc; i++) {
207  TString regMethod(argv[i]);
208  if(regMethod=="-b" || regMethod=="--batch") continue;
209  if (!methodList.IsNull()) methodList += TString(",");
210  methodList += regMethod;
211  }
212  TMVARegressionApplication(methodList);
213  return 0;
214 }
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1276
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3251
static Tools & Instance()
Definition: Tools.cxx:75
long long Long64_t
Definition: RtypesCore.h:69
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Definition: TStopwatch.cxx:58
void Print(Option_t *option="") const
Print the real and cpu time passed between the start and stop events.
Definition: TStopwatch.cxx:219
float Float_t
Definition: RtypesCore.h:53
void AddVariable(const TString &expression, Float_t *)
Add a float variable or expression to the reader.
Definition: Reader.cxx:308
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:285
const std::vector< Float_t > & EvaluateRegression(const TString &methodTag, Double_t aux=0)
evaluates MVA for given set of input variables
Definition: Reader.cxx:582
static Bool_t SetCacheFileDir(ROOT::Internal::TStringView cacheDir, Bool_t operateDisconnected=kTRUE, Bool_t forceCacheread=kFALSE)
Definition: TFile.h:315
int Int_t
Definition: RtypesCore.h:41
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3976
void Stop()
Stop the stopwatch.
Definition: TStopwatch.cxx:77
int main(int argc, char **argv)
IMethod * BookMVA(const TString &methodTag, const TString &weightfile)
read method name from weight file
Definition: Reader.cxx:377
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
unsigned int UInt_t
Definition: RtypesCore.h:42
Tools & gTools()
#define h(i)
Definition: RSha256.hxx:106
void AddSpectator(const TString &expression, Float_t *)
Add a float spectator or expression to the reader.
Definition: Reader.cxx:326
The TH1 histogram class.
Definition: TH1.h:56
The Reader class serves to use the MVAs in a specific analysis context.
Definition: Reader.h:63
Abstract ClassifierFactory template that handles arbitrary types.
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at &#39;separator&#39; and fills the list &#39;splitV&#39; with the primitive strings ...
Definition: Tools.cxx:1211
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:48
Stopwatch class.
Definition: TStopwatch.h:28