Logo ROOT   6.07/09
Reference Guide
TMVAClassification.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_tmva
3 /// \notebook -nodraw
4 /// This macro provides examples for the training and testing of the
5 /// TMVA classifiers.
6 ///
7 /// As input data is used a toy-MC sample consisting of four Gaussian-distributed
8 /// and linearly correlated input variables.
9 /// The methods to be used can be switched on and off by means of booleans, or
10 /// via the prompt command, for example:
11 ///
12 /// root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\)
13 ///
14 /// (note that the backslashes are mandatory)
15 /// If no method given, a default set of classifiers is used.
16 /// The output file "TMVA.root" can be analysed with the use of dedicated
17 /// macros (simply say: root -l <macro.C>), which can be conveniently
18 /// invoked through a GUI that will appear at the end of the run of this macro.
19 /// Launch the GUI via the command:
20 ///
21 /// root -l ./TMVAGui.C
22 ///
23 /// You can also compile and run the example with the following commands
24 ///
25 /// make
26 /// ./TMVAClassification <Methods>
27 ///
28 /// where: `<Methods> = "method1 method2"` are the TMVA classifier names
29 /// example:
30 ///
31 /// ./TMVAClassification Fisher LikelihoodPCA BDT
32 ///
33 /// If no method given, a default set is of classifiers is used
34 ///
35 /// - Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis
36 /// - Package : TMVA
37 /// - Root Macro: TMVAClassification
38 ///
39 /// \macro_output
40 /// \macro_code
41 /// \author Andreas Hoecker
42 
43 
44 #include <cstdlib>
45 #include <iostream>
46 #include <map>
47 #include <string>
48 
49 #include "TChain.h"
50 #include "TFile.h"
51 #include "TTree.h"
52 #include "TString.h"
53 #include "TObjString.h"
54 #include "TSystem.h"
55 #include "TROOT.h"
56 
57 #include "TMVA/Factory.h"
58 #include "TMVA/DataLoader.h"
59 #include "TMVA/Tools.h"
60 #include "TMVA/TMVAGui.h"
61 
62 int TMVAClassification( TString myMethodList = "" )
63 {
64  // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
65  // if you use your private .rootrc, or run from a different directory, please copy the
66  // corresponding lines from .rootrc
67 
68  // Methods to be processed can be given as an argument; use format:
69  //
70  // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
71  //
72  // if you like to use a method via the plugin mechanism, we recommend using
73  //
74  // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
75  //
76  // (an example is given for using the BDT as plugin (see below),
77  // but of course the real application is when you write your own
78  // method based)
79 
80  //---------------------------------------------------------------
81  // This loads the library
83 
84  // Default MVA methods to be trained + tested
85  std::map<std::string,int> Use;
86 
87  // Cut optimisation
88  Use["Cuts"] = 1;
89  Use["CutsD"] = 1;
90  Use["CutsPCA"] = 0;
91  Use["CutsGA"] = 0;
92  Use["CutsSA"] = 0;
93  //
94  // 1-dimensional likelihood ("naive Bayes estimator")
95  Use["Likelihood"] = 1;
96  Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings)
97  Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
98  Use["LikelihoodKDE"] = 0;
99  Use["LikelihoodMIX"] = 0;
100  //
101  // Mutidimensional likelihood and Nearest-Neighbour methods
102  Use["PDERS"] = 1;
103  Use["PDERSD"] = 0;
104  Use["PDERSPCA"] = 0;
105  Use["PDEFoam"] = 1;
106  Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting
107  Use["KNN"] = 1; // k-nearest neighbour method
108  //
109  // Linear Discriminant Analysis
110  Use["LD"] = 1; // Linear Discriminant identical to Fisher
111  Use["Fisher"] = 0;
112  Use["FisherG"] = 0;
113  Use["BoostedFisher"] = 0; // uses generalised MVA method boosting
114  Use["HMatrix"] = 0;
115  //
116  // Function Discriminant analysis
117  Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm
118  Use["FDA_SA"] = 0;
119  Use["FDA_MC"] = 0;
120  Use["FDA_MT"] = 0;
121  Use["FDA_GAMT"] = 0;
122  Use["FDA_MCMT"] = 0;
123  //
124  // Neural Networks (all are feed-forward Multilayer Perceptrons)
125  Use["MLP"] = 0; // Recommended ANN
126  Use["MLPBFGS"] = 0; // Recommended ANN with optional training method
127  Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator
128  Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH
129  Use["TMlpANN"] = 0; // ROOT's own ANN
130  Use["DNN"] = 0; // improved implementation of a NN
131  //
132  // Support Vector Machine
133  Use["SVM"] = 1;
134  //
135  // Boosted Decision Trees
136  Use["BDT"] = 1; // uses Adaptive Boost
137  Use["BDTG"] = 0; // uses Gradient Boost
138  Use["BDTB"] = 0; // uses Bagging
139  Use["BDTD"] = 0; // decorrelation + Adaptive Boost
140  Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting
141  //
142  // Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
143  Use["RuleFit"] = 1;
144  // ---------------------------------------------------------------
145 
146  std::cout << std::endl;
147  std::cout << "==> Start TMVAClassification" << std::endl;
148 
149  // Select methods (don't look at this code - not of interest)
150  if (myMethodList != "") {
151  for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
152 
153  std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
154  for (UInt_t i=0; i<mlist.size(); i++) {
155  std::string regMethod(mlist[i]);
156 
157  if (Use.find(regMethod) == Use.end()) {
158  std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
159  for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
160  std::cout << std::endl;
161  return 1;
162  }
163  Use[regMethod] = 1;
164  }
165  }
166 
167  // --------------------------------------------------------------------------------------------------
168 
169  // Here the preparation phase begins
170 
171  // Read training and test data
172  // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
173  TString fname = "./tmva_class_example.root";
174 
175  if (gSystem->AccessPathName( fname )) // file does not exist in local directory
176  gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root");
177 
178  TFile *input = TFile::Open( fname );
179 
180  std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl;
181 
182  // Register the training and test trees
183 
184  TTree *signalTree = (TTree*)input->Get("TreeS");
185  TTree *background = (TTree*)input->Get("TreeB");
186 
187  // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
188  TString outfileName( "TMVA.root" );
189  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
190 
191  // Create the factory object. Later you can choose the methods
192  // whose performance you'd like to investigate. The factory is
193  // the only TMVA object you have to interact with
194  //
195  // The first argument is the base of the name of all the
196  // weightfiles in the directory weight/
197  //
198  // The second argument is the output file for the training results
199  // All TMVA output can be suppressed by removing the "!" (not) in
200  // front of the "Silent" argument in the option string
201  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
202  "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
203 
204  TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset");
205  // If you wish to modify default settings
206  // (please check "src/Config.h" to see all available global options)
207  //
208  // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
209  // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
210 
211  // Define the input variables that shall be used for the MVA training
212  // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
213  // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
214  dataloader->AddVariable( "myvar1 := var1+var2", 'F' );
215  dataloader->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
216  dataloader->AddVariable( "var3", "Variable 3", "units", 'F' );
217  dataloader->AddVariable( "var4", "Variable 4", "units", 'F' );
218 
219  // You can add so-called "Spectator variables", which are not used in the MVA training,
220  // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
221  // input variables, the response values of all trained MVAs, and the spectator variables
222 
223  dataloader->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' );
224  dataloader->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' );
225 
226 
227  // global event weights per tree (see below for setting event-wise weights)
228  Double_t signalWeight = 1.0;
229  Double_t backgroundWeight = 1.0;
230 
231  // You can add an arbitrary number of signal or background trees
232  dataloader->AddSignalTree ( signalTree, signalWeight );
233  dataloader->AddBackgroundTree( background, backgroundWeight );
234 
235  // To give different trees for training and testing, do as follows:
236  //
237  // dataloader->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
238  // dataloader->AddSignalTree( signalTestTree, signalTestWeight, "Test" );
239 
240  // Use the following code instead of the above two or four lines to add signal and background
241  // training and test events "by hand"
242  // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
243  // variable definition, but simply compute the expression before adding the event
244  // ```cpp
245  // // --- begin ----------------------------------------------------------
246  // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
247  // Float_t treevars[4], weight;
248  //
249  // // Signal
250  // for (UInt_t ivar=0; ivar<4; ivar++) signalTree->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
251  // for (UInt_t i=0; i<signalTree->GetEntries(); i++) {
252  // signalTree->GetEntry(i);
253  // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
254  // // add training and test events; here: first half is training, second is testing
255  // // note that the weight can also be event-wise
256  // if (i < signalTree->GetEntries()/2.0) dataloader->AddSignalTrainingEvent( vars, signalWeight );
257  // else dataloader->AddSignalTestEvent ( vars, signalWeight );
258  // }
259  //
260  // // Background (has event weights)
261  // background->SetBranchAddress( "weight", &weight );
262  // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
263  // for (UInt_t i=0; i<background->GetEntries(); i++) {
264  // background->GetEntry(i);
265  // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
266  // // add training and test events; here: first half is training, second is testing
267  // // note that the weight can also be event-wise
268  // if (i < background->GetEntries()/2) dataloader->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
269  // else dataloader->AddBackgroundTestEvent ( vars, backgroundWeight*weight );
270  // }
271  // // --- end ------------------------------------------------------------
272  // ```
273  // End of tree registration
274 
275  // Set individual event weights (the variables must exist in the original TTree)
276  // - for signal : `dataloader->SetSignalWeightExpression ("weight1*weight2");`
277  // - for background: `dataloader->SetBackgroundWeightExpression("weight1*weight2");`
278  dataloader->SetBackgroundWeightExpression( "weight" );
279 
280  // Apply additional cuts on the signal and background samples (can be different)
281  TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
282  TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";
283 
284  // Tell the dataloader how to use the training and testing events
285  //
286  // If no numbers of events are given, half of the events in the tree are used
287  // for training, and the other half for testing:
288  //
289  // dataloader->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
290  //
291  // To also specify the number of testing events, use:
292  //
293  // dataloader->PrepareTrainingAndTestTree( mycut,
294  // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
295  dataloader->PrepareTrainingAndTestTree( mycuts, mycutb,
296  "nTrain_Signal=1000:nTrain_Background=1000:SplitMode=Random:NormMode=NumEvents:!V" );
297 
298  // ### Book MVA methods
299  //
300  // Please lookup the various method configuration options in the corresponding cxx files, eg:
301  // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
302  // it is possible to preset ranges in the option string in which the cut optimisation should be done:
303  // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
304 
305  // Cut optimisation
306  if (Use["Cuts"])
307  factory->BookMethod( dataloader, TMVA::Types::kCuts, "Cuts",
308  "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
309 
310  if (Use["CutsD"])
311  factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsD",
312  "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );
313 
314  if (Use["CutsPCA"])
315  factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsPCA",
316  "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );
317 
318  if (Use["CutsGA"])
319  factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsGA",
320  "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
321 
322  if (Use["CutsSA"])
323  factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsSA",
324  "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
325 
326  // Likelihood ("naive Bayes estimator")
327  if (Use["Likelihood"])
328  factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "Likelihood",
329  "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
330 
331  // Decorrelated likelihood
332  if (Use["LikelihoodD"])
333  factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodD",
334  "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );
335 
336  // PCA-transformed likelihood
337  if (Use["LikelihoodPCA"])
338  factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodPCA",
339  "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );
340 
341  // Use a kernel density estimator to approximate the PDFs
342  if (Use["LikelihoodKDE"])
343  factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodKDE",
344  "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );
345 
346  // Use a variable-dependent mix of splines and kernel density estimator
347  if (Use["LikelihoodMIX"])
348  factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodMIX",
349  "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );
350 
351  // Test the multi-dimensional probability density estimator
352  // here are the options strings for the MinMax and RMS methods, respectively:
353  //
354  // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
355  // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
356  if (Use["PDERS"])
357  factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERS",
358  "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );
359 
360  if (Use["PDERSD"])
361  factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERSD",
362  "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );
363 
364  if (Use["PDERSPCA"])
365  factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERSPCA",
366  "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );
367 
368  // Multi-dimensional likelihood estimator using self-adapting phase-space binning
369  if (Use["PDEFoam"])
370  factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoam",
371  "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
372 
373  if (Use["PDEFoamBoost"])
374  factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoamBoost",
375  "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );
376 
377  // K-Nearest Neighbour classifier (KNN)
378  if (Use["KNN"])
379  factory->BookMethod( dataloader, TMVA::Types::kKNN, "KNN",
380  "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
381 
382  // H-Matrix (chi2-squared) method
383  if (Use["HMatrix"])
384  factory->BookMethod( dataloader, TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );
385 
386  // Linear discriminant (same as Fisher discriminant)
387  if (Use["LD"])
388  factory->BookMethod( dataloader, TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
389 
390  // Fisher discriminant (same as LD)
391  if (Use["Fisher"])
392  factory->BookMethod( dataloader, TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
393 
394  // Fisher with Gauss-transformed input variables
395  if (Use["FisherG"])
396  factory->BookMethod( dataloader, TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );
397 
398  // Composite classifier: ensemble (tree) of boosted Fisher classifiers
399  if (Use["BoostedFisher"])
400  factory->BookMethod( dataloader, TMVA::Types::kFisher, "BoostedFisher",
401  "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );
402 
403  // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
404  if (Use["FDA_MC"])
405  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MC",
406  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
407 
408  if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
409  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GA",
410  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
411 
412  if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
413  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_SA",
414  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
415 
416  if (Use["FDA_MT"])
417  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MT",
418  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
419 
420  if (Use["FDA_GAMT"])
421  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GAMT",
422  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
423 
424  if (Use["FDA_MCMT"])
425  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MCMT",
426  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );
427 
428  // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
429  if (Use["MLP"])
430  factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
431 
432  if (Use["MLPBFGS"])
433  factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );
434 
435  if (Use["MLPBNN"])
436  factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators
437 
438 
439  // improved neural network implementation
440  if (Use["DNN"])
441  {
442  /*
443  TString layoutString ("Layout=TANH|(N+100)*2,LINEAR");
444  TString layoutString ("Layout=SOFTSIGN|100,SOFTSIGN|50,SOFTSIGN|20,LINEAR");
445  TString layoutString ("Layout=RELU|300,RELU|100,RELU|30,RELU|10,LINEAR");
446  TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|30,SOFTSIGN|20,SOFTSIGN|10,LINEAR");
447  TString layoutString ("Layout=TANH|50,TANH|30,TANH|20,TANH|10,LINEAR");
448  TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|20,LINEAR");
449  */
450  TString layoutString ("Layout=TANH|100,TANH|50,TANH|10,LINEAR");
451 
452  TString training0 ("LearningRate=1e-1,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
453  TString training1 ("LearningRate=1e-2,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
454  TString training2 ("LearningRate=1e-2,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True");
455  TString training3 ("LearningRate=1e-3,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True");
456 
457  TString trainingStrategyString ("TrainingStrategy=");
458  trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3;
459 
460 
461  // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY");
462  TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM");
463  // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS");
464  nnOptions.Append (":"); nnOptions.Append (layoutString);
465  nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);
466 
467  factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN", nnOptions ); // NN
468  }
469 
470 
471 
472  // CF(Clermont-Ferrand)ANN
473  if (Use["CFMlpANN"])
474  factory->BookMethod( dataloader, TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:...
475 
476  // Tmlp(Root)ANN
477  if (Use["TMlpANN"])
478  factory->BookMethod( dataloader, TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:...
479 
480  // Support Vector Machine
481  if (Use["SVM"])
482  factory->BookMethod( dataloader, TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
483 
484  // Boosted Decision Trees
485  if (Use["BDTG"]) // Gradient Boost
486  factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG",
487  "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );
488 
489  if (Use["BDT"]) // Adaptive Boost
490  factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDT",
491  "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
492 
493  if (Use["BDTB"]) // Bagging
494  factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTB",
495  "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );
496 
497  if (Use["BDTD"]) // Decorrelation + Adaptive Boost
498  factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTD",
499  "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );
500 
501  if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
502  factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTMitFisher",
503  "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );
504 
505  // RuleFit -- TMVA implementation of Friedman's method
506  if (Use["RuleFit"])
507  factory->BookMethod( dataloader, TMVA::Types::kRuleFit, "RuleFit",
508  "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
509 
510  // For an example of the category classifier usage, see: TMVAClassificationCategory
511  //
512  // --------------------------------------------------------------------------------------------------
513  // Now you can optimize the setting (configuration) of the MVAs using the set of training events
514  // STILL EXPERIMENTAL and only implemented for BDT's !
515  //
516  // factory->OptimizeAllMethods("SigEffAt001","Scan");
517  // factory->OptimizeAllMethods("ROCIntegral","FitGA");
518  //
519  // --------------------------------------------------------------------------------------------------
520 
521  // Now you can tell the factory to train, test, and evaluate the MVAs
522  //
523  // Train MVAs using the set of training events
524  factory->TrainAllMethods();
525 
526  // Evaluate all MVAs using the set of test events
527  factory->TestAllMethods();
528 
529  // Evaluate and compare performance of all configured MVAs
530  factory->EvaluateAllMethods();
531 
532  // --------------------------------------------------------------
533 
534  // Save the output
535  outputFile->Close();
536 
537  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
538  std::cout << "==> TMVAClassification is done!" << std::endl;
539 
540  delete factory;
541  delete dataloader;
542  // Launch the GUI for the root macros
543  if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName );
544 
545  return 0;
546 }
547 
548 int main( int argc, char** argv )
549 {
550  // Select methods (don't look at this code - not of interest)
551  TString methodList;
552  for (int i=1; i<argc; i++) {
553  TString regMethod(argv[i]);
554  if(regMethod=="-b" || regMethod=="--batch") continue;
555  if (!methodList.IsNull()) methodList += TString(",");
556  methodList += regMethod;
557  }
558  return TMVAClassification(methodList);
559 }
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
Definition: DataLoader.cxx:381
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1265
static Tools & Instance()
Definition: Tools.cxx:80
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Definition: Factory.cxx:337
void TMVAGui(const char *fName="TMVA.root", TString dataset="")
Definition: TMVAGui.cxx:52
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
Double_t background(Double_t *x, Double_t *par)
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
#define gROOT
Definition: TROOT.h:364
Basic string class.
Definition: TString.h:137
void TrainAllMethods()
iterates through all booked methods and calls training
Definition: Factory.cxx:822
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
Definition: DataLoader.cxx:455
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3871
Tools & gTools()
Definition: Tools.cxx:79
A specialized string object used for TTree selections.
Definition: TCut.h:27
R__EXTERN TSystem * gSystem
Definition: TSystem.h:549
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
Definition: Factory.cxx:1058
void TestAllMethods()
Definition: Factory.cxx:957
unsigned int UInt_t
Definition: RtypesCore.h:42
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
virtual Int_t Exec(const char *shellcmd)
Execute a command.
Definition: TSystem.cxx:658
Bool_t IsNull() const
Definition: TString.h:387
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
Definition: DataLoader.cxx:579
double Double_t
Definition: RtypesCore.h:55
void SetBackgroundWeightExpression(const TString &variable)
Definition: DataLoader.cxx:512
A TTree object has a header with a name and a title.
Definition: TTree.h:98
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
Definition: DataLoader.cxx:352
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at &#39;separator&#39; and fills the list &#39;splitV&#39; with the primitive strings ...
Definition: Tools.cxx:1207
int main(int argc, char **argv)
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
Definition: DataLoader.cxx:483
virtual void Close(Option_t *option="")
Close a file.
Definition: TFile.cxx:904