ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
TMVARegression.C
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 /**********************************************************************************
3  * Project : TMVA - a Root-integrated toolkit for multivariate data analysis *
4  * Package : TMVA *
5  * Root Macro: TMVARegression *
6  * *
7  * This macro provides examples for the training and testing of the *
8  * TMVA classifiers. *
9  * *
10  * As input data is used a toy-MC sample consisting of four Gaussian-distributed *
11  * and linearly correlated input variables. *
12  * *
13  * The methods to be used can be switched on and off by means of booleans, or *
14  * via the prompt command, for example: *
15  * *
16  * root -l TMVARegression.C\(\"LD,MLP\"\) *
17  * *
18  * (note that the backslashes are mandatory) *
19  * If no method given, a default set is used. *
20  * *
21  * The output file "TMVAReg.root" can be analysed with the use of dedicated *
22  * macros (simply say: root -l <macro.C>), which can be conveniently *
23  * invoked through a GUI that will appear at the end of the run of this macro. *
24  **********************************************************************************/
25 
26 #include <cstdlib>
27 #include <iostream>
28 #include <map>
29 #include <string>
30 
31 #include "TChain.h"
32 #include "TFile.h"
33 #include "TTree.h"
34 #include "TString.h"
35 #include "TObjString.h"
36 #include "TSystem.h"
37 #include "TROOT.h"
38 
39 #include "TMVA/Tools.h"
40 #include "TMVA/Factory.h"
41 #include "TMVA/TMVARegGui.h"
42 
43 
44 using namespace TMVA;
45 
46 void TMVARegression( TString myMethodList = "" )
47 {
48  // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
49  // if you use your private .rootrc, or run from a different directory, please copy the
50  // corresponding lines from .rootrc
51 
52  // methods to be processed can be given as an argument; use format:
53  //
54  // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
55  //
56 
57  //---------------------------------------------------------------
58  // This loads the library
60 
61 
62 
63  // Default MVA methods to be trained + tested
64  std::map<std::string,int> Use;
65 
66  // --- Mutidimensional likelihood and Nearest-Neighbour methods
67  Use["PDERS"] = 0;
68  Use["PDEFoam"] = 1;
69  Use["KNN"] = 1;
70  //
71  // --- Linear Discriminant Analysis
72  Use["LD"] = 1;
73  //
74  // --- Function Discriminant analysis
75  Use["FDA_GA"] = 1;
76  Use["FDA_MC"] = 0;
77  Use["FDA_MT"] = 0;
78  Use["FDA_GAMT"] = 0;
79  //
80  // --- Neural Network
81  Use["MLP"] = 1;
82  //
83  // --- Support Vector Machine
84  Use["SVM"] = 0;
85  //
86  // --- Boosted Decision Trees
87  Use["BDT"] = 0;
88  Use["BDTG"] = 1;
89  // ---------------------------------------------------------------
90 
91  std::cout << std::endl;
92  std::cout << "==> Start TMVARegression" << std::endl;
93 
94  // Select methods (don't look at this code - not of interest)
95  if (myMethodList != "") {
96  for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
97 
98  std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
99  for (UInt_t i=0; i<mlist.size(); i++) {
100  std::string regMethod(mlist[i]);
101 
102  if (Use.find(regMethod) == Use.end()) {
103  std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
104  for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
105  std::cout << std::endl;
106  return;
107  }
108  Use[regMethod] = 1;
109  }
110  }
111 
112  // --------------------------------------------------------------------------------------------------
113 
114  // --- Here the preparation phase begins
115 
116  // Create a new root output file
117  TString outfileName( "TMVAReg.root" );
118  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
119 
120  // Create the factory object. Later you can choose the methods
121  // whose performance you'd like to investigate. The factory will
122  // then run the performance analysis for you.
123  //
124  // The first argument is the base of the name of all the
125  // weightfiles in the directory weight/
126  //
127  // The second argument is the output file for the training results
128  // All TMVA output can be suppressed by removing the "!" (not) in
129  // front of the "Silent" argument in the option string
130  TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile,
131  "!V:!Silent:Color:DrawProgressBar" );
132 
133  // If you wish to modify default settings
134  // (please check "src/Config.h" to see all available global options)
135  // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
136  // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
137 
138  // Define the input variables that shall be used for the MVA training
139  // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
140  // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
141  factory->AddVariable( "var1", "Variable 1", "units", 'F' );
142  factory->AddVariable( "var2", "Variable 2", "units", 'F' );
143 
144  // You can add so-called "Spectator variables", which are not used in the MVA training,
145  // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
146  // input variables, the response values of all trained MVAs, and the spectator variables
147  factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' );
148  factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' );
149 
150  // Add the variable carrying the regression target
151  factory->AddTarget( "fvalue" );
152 
153  // It is also possible to declare additional targets for multi-dimensional regression, ie:
154  // -- factory->AddTarget( "fvalue2" );
155  // BUT: this is currently ONLY implemented for MLP
156 
157  // Read training and test data (see TMVAClassification for reading ASCII files)
158  // load the signal and background event samples from ROOT trees
159  TFile *input(0);
160  TString fname = "./tmva_reg_example.root";
161  if (!gSystem->AccessPathName( fname ))
162  input = TFile::Open( fname ); // check if file in local directory exists
163  else
164  input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server
165 
166  if (!input) {
167  std::cout << "ERROR: could not open data file" << std::endl;
168  exit(1);
169  }
170  std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl;
171 
172  // --- Register the regression tree
173 
174  TTree *regTree = (TTree*)input->Get("TreeR");
175 
176  // global event weights per tree (see below for setting event-wise weights)
177  Double_t regWeight = 1.0;
178 
179  // You can add an arbitrary number of regression trees
180  factory->AddRegressionTree( regTree, regWeight );
181 
182  // This would set individual event weights (the variables defined in the
183  // expression need to exist in the original TTree)
184  factory->SetWeightExpression( "var1", "Regression" );
185 
186  // Apply additional cuts on the signal and background samples (can be different)
187  TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
188 
189  // tell the factory to use all remaining events in the trees after training for testing:
190  factory->PrepareTrainingAndTestTree( mycut,
191  "nTrain_Regression=1000:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );
192  // factory->PrepareTrainingAndTestTree( mycut,
193  // "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );
194 
195  // If no numbers of events are given, half of the events in the tree are used
196  // for training, and the other half for testing:
197  // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
198 
199  // ---- Book MVA methods
200  //
201  // please lookup the various method configuration options in the corresponding cxx files, eg:
202  // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
203  // it is possible to preset ranges in the option string in which the cut optimisation should be done:
204  // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
205 
206  // PDE - RS method
207  if (Use["PDERS"])
208  factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
209  "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
210  // And the options strings for the MinMax and RMS methods, respectively:
211  // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
212  // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
213 
214  if (Use["PDEFoam"])
215  factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
216  "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );
217 
218  // K-Nearest Neighbour classifier (KNN)
219  if (Use["KNN"])
220  factory->BookMethod( TMVA::Types::kKNN, "KNN",
221  "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
222 
223  // Linear discriminant
224  if (Use["LD"])
225  factory->BookMethod( TMVA::Types::kLD, "LD",
226  "!H:!V:VarTransform=None" );
227 
228  // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
229  if (Use["FDA_MC"])
230  factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
231  "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
232 
233  if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
234  factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
235  "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );
236 
237  if (Use["FDA_MT"])
238  factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
239  "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
240 
241  if (Use["FDA_GAMT"])
242  factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
243  "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
244 
245  // Neural network (MLP)
246  if (Use["MLP"])
247  factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );
248 
249  // Support Vector Machine
250  if (Use["SVM"])
251  factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
252 
253  // Boosted Decision Trees
254  if (Use["BDT"])
255  factory->BookMethod( TMVA::Types::kBDT, "BDT",
256  "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
257 
258  if (Use["BDTG"])
259  factory->BookMethod( TMVA::Types::kBDT, "BDTG",
260  "!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3:MaxDepth=4" );
261  // --------------------------------------------------------------------------------------------------
262 
263  // ---- Now you can tell the factory to train, test, and evaluate the MVAs
264 
265  // Train MVAs using the set of training events
266  factory->TrainAllMethods();
267 
268  // ---- Evaluate all MVAs using the set of test events
269  factory->TestAllMethods();
270 
271  // ----- Evaluate and compare performance of all configured MVAs
272  factory->EvaluateAllMethods();
273 
274  // --------------------------------------------------------------
275 
276  // Save the output
277  outputFile->Close();
278 
279  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
280  std::cout << "==> TMVARegression is done!" << std::endl;
281 
282  delete factory;
283 
284  // Launch the GUI for the root macros
285  if (!gROOT->IsBatch()) TMVA::TMVARegGui( outfileName );
286 }
287 
288 int main( int argc, char** argv )
289 {
290  // Select methods (don't look at this code - not of interest)
291  TString methodList;
292  for (int i=1; i<argc; i++) {
293  TString regMethod(argv[i]);
294  if(regMethod=="-b" || regMethod=="--batch") continue;
295  if (!methodList.IsNull()) methodList += TString(",");
296  methodList += regMethod;
297  }
298  TMVARegression(methodList);
299  return 0;
300 }
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1213
static Tools & Instance()
Definition: Tools.cxx:80
void AddRegressionTree(TTree *tree, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
Definition: Factory.h:132
#define gROOT
Definition: TROOT.h:344
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: Factory.cxx:570
Basic string class.
Definition: TString.h:137
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: Factory.cxx:540
void TrainAllMethods()
iterates through all booked methods and calls training
Definition: Factory.cxx:965
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: Factory.cxx:558
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3851
Tools & gTools()
Definition: Tools.cxx:79
void TMVARegression(TString myMethodList="")
A specialized string object used for TTree selections.
Definition: TCut.h:27
int main(int argc, char **argv)
R__EXTERN TSystem * gSystem
Definition: TSystem.h:545
MethodBase * BookMethod(TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Definition: Factory.cxx:706
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
Definition: Factory.cxx:1185
void TestAllMethods()
Definition: Factory.cxx:1085
unsigned int UInt_t
Definition: RtypesCore.h:42
Bool_t IsNull() const
Definition: TString.h:387
double Double_t
Definition: RtypesCore.h:55
void TMVARegGui(const char *fName="TMVAReg.root")
Definition: TMVARegGui.cxx:46
void SetWeightExpression(const TString &variable, const TString &className="")
Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;.
Definition: Factory.cxx:610
A TTree object has a header with a name and a title.
Definition: TTree.h:98
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at 'separator' and fills the list 'splitV' with the primitive strings ...
Definition: Tools.cxx:1207
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: Factory.cxx:679