Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVAClassification.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_tmva
3/// \notebook -nodraw
4/// This macro provides examples for the training and testing of the
5/// TMVA classifiers.
6///
7/// As input data is used a toy-MC sample consisting of four Gaussian-distributed
8/// and linearly correlated input variables.
9/// The methods to be used can be switched on and off by means of booleans, or
10/// via the prompt command, for example:
11///
12/// root -l ./TMVAClassification.C\‍(\"Fisher,Likelihood\"\‍)
13///
14/// (note that the backslashes are mandatory)
15/// If no method given, a default set of classifiers is used.
16/// The output file "TMVA.root" can be analysed with the use of dedicated
17/// macros (simply say: root -l <macro.C>), which can be conveniently
18/// invoked through a GUI that will appear at the end of the run of this macro.
19/// Launch the GUI via the command:
20///
21/// root -l ./TMVAGui.C
22///
23/// You can also compile and run the example with the following commands
24///
25/// make
26/// ./TMVAClassification <Methods>
27///
28/// where: `<Methods> = "method1 method2"` are the TMVA classifier names
29/// example:
30///
31/// ./TMVAClassification Fisher LikelihoodPCA BDT
32///
33/// If no method given, a default set is of classifiers is used
34///
35/// - Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis
36/// - Package : TMVA
37/// - Root Macro: TMVAClassification
38///
39/// \macro_output
40/// \macro_code
41/// \author Andreas Hoecker
42
43
44#include <cstdlib>
45#include <iostream>
46#include <map>
47#include <string>
48
49#include "TChain.h"
50#include "TFile.h"
51#include "TTree.h"
52#include "TString.h"
53#include "TObjString.h"
54#include "TSystem.h"
55#include "TROOT.h"
56
57#include "TMVA/Factory.h"
58#include "TMVA/DataLoader.h"
59#include "TMVA/Tools.h"
60#include "TMVA/TMVAGui.h"
61
62int TMVAClassification( TString myMethodList = "" )
63{
64 // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
65 // if you use your private .rootrc, or run from a different directory, please copy the
66 // corresponding lines from .rootrc
67
68 // Methods to be processed can be given as an argument; use format:
69 //
70 // mylinux~> root -l TMVAClassification.C\‍(\"myMethod1,myMethod2,myMethod3\"\‍)
71
72 //---------------------------------------------------------------
73 // This loads the library
75
76 // Default MVA methods to be trained + tested
77 std::map<std::string,int> Use;
78
79 // Cut optimisation
80 Use["Cuts"] = 1;
81 Use["CutsD"] = 1;
82 Use["CutsPCA"] = 0;
83 Use["CutsGA"] = 0;
84 Use["CutsSA"] = 0;
85 //
86 // 1-dimensional likelihood ("naive Bayes estimator")
87 Use["Likelihood"] = 1;
88 Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings)
89 Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
90 Use["LikelihoodKDE"] = 0;
91 Use["LikelihoodMIX"] = 0;
92 //
93 // Mutidimensional likelihood and Nearest-Neighbour methods
94 Use["PDERS"] = 1;
95 Use["PDERSD"] = 0;
96 Use["PDERSPCA"] = 0;
97 Use["PDEFoam"] = 1;
98 Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting
99 Use["KNN"] = 1; // k-nearest neighbour method
100 //
101 // Linear Discriminant Analysis
102 Use["LD"] = 1; // Linear Discriminant identical to Fisher
103 Use["Fisher"] = 0;
104 Use["FisherG"] = 0;
105 Use["BoostedFisher"] = 0; // uses generalised MVA method boosting
106 Use["HMatrix"] = 0;
107 //
108 // Function Discriminant analysis
109 Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm
110 Use["FDA_SA"] = 0;
111 Use["FDA_MC"] = 0;
112 Use["FDA_MT"] = 0;
113 Use["FDA_GAMT"] = 0;
114 Use["FDA_MCMT"] = 0;
115 //
116 // Neural Networks (all are feed-forward Multilayer Perceptrons)
117 Use["MLP"] = 0; // Recommended ANN
118 Use["MLPBFGS"] = 0; // Recommended ANN with optional training method
119 Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator
120 Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH
121 Use["TMlpANN"] = 0; // ROOT's own ANN
122#ifdef R__HAS_TMVAGPU
123 Use["DNN_GPU"] = 1; // CUDA-accelerated DNN training.
124#else
125 Use["DNN_GPU"] = 0;
126#endif
127
128#ifdef R__HAS_TMVACPU
129 Use["DNN_CPU"] = 1; // Multi-core accelerated DNN.
130#else
131 Use["DNN_CPU"] = 0;
132#endif
133 //
134 // Support Vector Machine
135 Use["SVM"] = 1;
136 //
137 // Boosted Decision Trees
138 Use["BDT"] = 1; // uses Adaptive Boost
139 Use["BDTG"] = 0; // uses Gradient Boost
140 Use["BDTB"] = 0; // uses Bagging
141 Use["BDTD"] = 0; // decorrelation + Adaptive Boost
142 Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting
143 //
144 // Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
145 Use["RuleFit"] = 1;
146 // ---------------------------------------------------------------
147
148 std::cout << std::endl;
149 std::cout << "==> Start TMVAClassification" << std::endl;
150
151 // Select methods (don't look at this code - not of interest)
152 if (myMethodList != "") {
153 for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
154
155 std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
156 for (UInt_t i=0; i<mlist.size(); i++) {
157 std::string regMethod(mlist[i]);
158
159 if (Use.find(regMethod) == Use.end()) {
160 std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
161 for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
162 std::cout << std::endl;
163 return 1;
164 }
165 Use[regMethod] = 1;
166 }
167 }
168
169 // --------------------------------------------------------------------------------------------------
170
171 // Here the preparation phase begins
172
173 // Read training and test data
174 // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
175 TFile *input(0);
176 TString fname = "./tmva_class_example.root";
177 if (!gSystem->AccessPathName( fname )) {
178 input = TFile::Open( fname ); // check if file in local directory exists
179 }
180 else {
182 input = TFile::Open("http://root.cern.ch/files/tmva_class_example.root", "CACHEREAD");
183 }
184 if (!input) {
185 std::cout << "ERROR: could not open data file" << std::endl;
186 exit(1);
187 }
188 std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl;
189
190 // Register the training and test trees
191
192 TTree *signalTree = (TTree*)input->Get("TreeS");
193 TTree *background = (TTree*)input->Get("TreeB");
194
195 // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
196 TString outfileName( "TMVA.root" );
197 TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
198
199 // Create the factory object. Later you can choose the methods
200 // whose performance you'd like to investigate. The factory is
201 // the only TMVA object you have to interact with
202 //
203 // The first argument is the base of the name of all the
204 // weightfiles in the directory weight/
205 //
206 // The second argument is the output file for the training results
207 // All TMVA output can be suppressed by removing the "!" (not) in
208 // front of the "Silent" argument in the option string
209 TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
210 "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
211
212 TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset");
213 // If you wish to modify default settings
214 // (please check "src/Config.h" to see all available global options)
215 //
216 // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
217 // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
218
219 // Define the input variables that shall be used for the MVA training
220 // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
221 // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
222 dataloader->AddVariable( "myvar1 := var1+var2", 'F' );
223 dataloader->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
224 dataloader->AddVariable( "var3", "Variable 3", "units", 'F' );
225 dataloader->AddVariable( "var4", "Variable 4", "units", 'F' );
226
227 // You can add so-called "Spectator variables", which are not used in the MVA training,
228 // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
229 // input variables, the response values of all trained MVAs, and the spectator variables
230
231 dataloader->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' );
232 dataloader->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' );
233
234
235 // global event weights per tree (see below for setting event-wise weights)
236 Double_t signalWeight = 1.0;
237 Double_t backgroundWeight = 1.0;
238
239 // You can add an arbitrary number of signal or background trees
240 dataloader->AddSignalTree ( signalTree, signalWeight );
241 dataloader->AddBackgroundTree( background, backgroundWeight );
242
243 // To give different trees for training and testing, do as follows:
244 //
245 // dataloader->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
246 // dataloader->AddSignalTree( signalTestTree, signalTestWeight, "Test" );
247
248 // Use the following code instead of the above two or four lines to add signal and background
249 // training and test events "by hand"
250 // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
251 // variable definition, but simply compute the expression before adding the event
252 // ```cpp
253 // // --- begin ----------------------------------------------------------
254 // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
255 // Float_t treevars[4], weight;
256 //
257 // // Signal
258 // for (UInt_t ivar=0; ivar<4; ivar++) signalTree->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
259 // for (UInt_t i=0; i<signalTree->GetEntries(); i++) {
260 // signalTree->GetEntry(i);
261 // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
262 // // add training and test events; here: first half is training, second is testing
263 // // note that the weight can also be event-wise
264 // if (i < signalTree->GetEntries()/2.0) dataloader->AddSignalTrainingEvent( vars, signalWeight );
265 // else dataloader->AddSignalTestEvent ( vars, signalWeight );
266 // }
267 //
268 // // Background (has event weights)
269 // background->SetBranchAddress( "weight", &weight );
270 // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
271 // for (UInt_t i=0; i<background->GetEntries(); i++) {
272 // background->GetEntry(i);
273 // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
274 // // add training and test events; here: first half is training, second is testing
275 // // note that the weight can also be event-wise
276 // if (i < background->GetEntries()/2) dataloader->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
277 // else dataloader->AddBackgroundTestEvent ( vars, backgroundWeight*weight );
278 // }
279 // // --- end ------------------------------------------------------------
280 // ```
281 // End of tree registration
282
283 // Set individual event weights (the variables must exist in the original TTree)
284 // - for signal : `dataloader->SetSignalWeightExpression ("weight1*weight2");`
285 // - for background: `dataloader->SetBackgroundWeightExpression("weight1*weight2");`
286 dataloader->SetBackgroundWeightExpression( "weight" );
287
288 // Apply additional cuts on the signal and background samples (can be different)
289 TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
290 TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";
291
292 // Tell the dataloader how to use the training and testing events
293 //
294 // If no numbers of events are given, half of the events in the tree are used
295 // for training, and the other half for testing:
296 //
297 // dataloader->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
298 //
299 // To also specify the number of testing events, use:
300 //
301 // dataloader->PrepareTrainingAndTestTree( mycut,
302 // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
303 dataloader->PrepareTrainingAndTestTree( mycuts, mycutb,
304 "nTrain_Signal=1000:nTrain_Background=1000:SplitMode=Random:NormMode=NumEvents:!V" );
305
306 // ### Book MVA methods
307 //
308 // Please lookup the various method configuration options in the corresponding cxx files, eg:
309 // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
310 // it is possible to preset ranges in the option string in which the cut optimisation should be done:
311 // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
312
313 // Cut optimisation
314 if (Use["Cuts"])
315 factory->BookMethod( dataloader, TMVA::Types::kCuts, "Cuts",
316 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
317
318 if (Use["CutsD"])
319 factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsD",
320 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );
321
322 if (Use["CutsPCA"])
323 factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsPCA",
324 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );
325
326 if (Use["CutsGA"])
327 factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsGA",
328 "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
329
330 if (Use["CutsSA"])
331 factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsSA",
332 "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
333
334 // Likelihood ("naive Bayes estimator")
335 if (Use["Likelihood"])
336 factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "Likelihood",
337 "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
338
339 // Decorrelated likelihood
340 if (Use["LikelihoodD"])
341 factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodD",
342 "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );
343
344 // PCA-transformed likelihood
345 if (Use["LikelihoodPCA"])
346 factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodPCA",
347 "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );
348
349 // Use a kernel density estimator to approximate the PDFs
350 if (Use["LikelihoodKDE"])
351 factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodKDE",
352 "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );
353
354 // Use a variable-dependent mix of splines and kernel density estimator
355 if (Use["LikelihoodMIX"])
356 factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodMIX",
357 "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );
358
359 // Test the multi-dimensional probability density estimator
360 // here are the options strings for the MinMax and RMS methods, respectively:
361 //
362 // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
363 // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
364 if (Use["PDERS"])
365 factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERS",
366 "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );
367
368 if (Use["PDERSD"])
369 factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERSD",
370 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );
371
372 if (Use["PDERSPCA"])
373 factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERSPCA",
374 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );
375
376 // Multi-dimensional likelihood estimator using self-adapting phase-space binning
377 if (Use["PDEFoam"])
378 factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoam",
379 "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
380
381 if (Use["PDEFoamBoost"])
382 factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoamBoost",
383 "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );
384
385 // K-Nearest Neighbour classifier (KNN)
386 if (Use["KNN"])
387 factory->BookMethod( dataloader, TMVA::Types::kKNN, "KNN",
388 "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
389
390 // H-Matrix (chi2-squared) method
391 if (Use["HMatrix"])
392 factory->BookMethod( dataloader, TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );
393
394 // Linear discriminant (same as Fisher discriminant)
395 if (Use["LD"])
396 factory->BookMethod( dataloader, TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
397
398 // Fisher discriminant (same as LD)
399 if (Use["Fisher"])
400 factory->BookMethod( dataloader, TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
401
402 // Fisher with Gauss-transformed input variables
403 if (Use["FisherG"])
404 factory->BookMethod( dataloader, TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );
405
406 // Composite classifier: ensemble (tree) of boosted Fisher classifiers
407 if (Use["BoostedFisher"])
408 factory->BookMethod( dataloader, TMVA::Types::kFisher, "BoostedFisher",
409 "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );
410
411 // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
412 if (Use["FDA_MC"])
413 factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MC",
414 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
415
416 if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
417 factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GA",
418 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=2:Steps=5:Trim=True:SaveBestGen=1" );
419
420 if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
421 factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_SA",
422 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
423
424 if (Use["FDA_MT"])
425 factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MT",
426 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
427
428 if (Use["FDA_GAMT"])
429 factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GAMT",
430 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
431
432 if (Use["FDA_MCMT"])
433 factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MCMT",
434 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );
435
436 // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
437 if (Use["MLP"])
438 factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
439
440 if (Use["MLPBFGS"])
441 factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );
442
443 if (Use["MLPBNN"])
444 factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators
445
446
447 // Multi-architecture DNN implementation.
448 if (Use["DNN_CPU"] or Use["DNN_GPU"]) {
449 // General layout.
450 TString layoutString ("Layout=TANH|128,TANH|128,TANH|128,LINEAR");
451
452 // Define Training strategy. One could define multiple strategy string separated by the "|" delimiter
453
454 TString trainingStrategyString = ("TrainingStrategy=LearningRate=1e-2,Momentum=0.9,"
455 "ConvergenceSteps=20,BatchSize=100,TestRepetitions=1,"
456 "WeightDecay=1e-4,Regularization=None,"
457 "DropConfig=0.0+0.5+0.5+0.5");
458
459 // General Options.
460 TString dnnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:"
461 "WeightInitialization=XAVIERUNIFORM");
462 dnnOptions.Append (":"); dnnOptions.Append (layoutString);
463 dnnOptions.Append (":"); dnnOptions.Append (trainingStrategyString);
464
465 // Cuda implementation.
466 if (Use["DNN_GPU"]) {
467 TString gpuOptions = dnnOptions + ":Architecture=GPU";
468 factory->BookMethod(dataloader, TMVA::Types::kDL, "DNN_GPU", gpuOptions);
469 }
470 // Multi-core CPU implementation.
471 if (Use["DNN_CPU"]) {
472 TString cpuOptions = dnnOptions + ":Architecture=CPU";
473 factory->BookMethod(dataloader, TMVA::Types::kDL, "DNN_CPU", cpuOptions);
474 }
475 }
476
477 // CF(Clermont-Ferrand)ANN
478 if (Use["CFMlpANN"])
479 factory->BookMethod( dataloader, TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:...
480
481 // Tmlp(Root)ANN
482 if (Use["TMlpANN"])
483 factory->BookMethod( dataloader, TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:...
484
485 // Support Vector Machine
486 if (Use["SVM"])
487 factory->BookMethod( dataloader, TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
488
489 // Boosted Decision Trees
490 if (Use["BDTG"]) // Gradient Boost
491 factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG",
492 "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );
493
494 if (Use["BDT"]) // Adaptive Boost
495 factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDT",
496 "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
497
498 if (Use["BDTB"]) // Bagging
499 factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTB",
500 "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );
501
502 if (Use["BDTD"]) // Decorrelation + Adaptive Boost
503 factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTD",
504 "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );
505
506 if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
507 factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTF",
508 "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );
509
510 // RuleFit -- TMVA implementation of Friedman's method
511 if (Use["RuleFit"])
512 factory->BookMethod( dataloader, TMVA::Types::kRuleFit, "RuleFit",
513 "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
514
515 // For an example of the category classifier usage, see: TMVAClassificationCategory
516 //
517 // --------------------------------------------------------------------------------------------------
518 // Now you can optimize the setting (configuration) of the MVAs using the set of training events
519 // STILL EXPERIMENTAL and only implemented for BDT's !
520 //
521 // factory->OptimizeAllMethods("SigEffAtBkg0.01","Scan");
522 // factory->OptimizeAllMethods("ROCIntegral","FitGA");
523 //
524 // --------------------------------------------------------------------------------------------------
525
526 // Now you can tell the factory to train, test, and evaluate the MVAs
527 //
528 // Train MVAs using the set of training events
529 factory->TrainAllMethods();
530
531 // Evaluate all MVAs using the set of test events
532 factory->TestAllMethods();
533
534 // Evaluate and compare performance of all configured MVAs
535 factory->EvaluateAllMethods();
536
537 // --------------------------------------------------------------
538
539 // Save the output
540 outputFile->Close();
541
542 std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
543 std::cout << "==> TMVAClassification is done!" << std::endl;
544
545 delete factory;
546 delete dataloader;
547 // Launch the GUI for the root macros
548 if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName );
549
550 return 0;
551}
552
553int main( int argc, char** argv )
554{
555 // Select methods (don't look at this code - not of interest)
556 TString methodList;
557 for (int i=1; i<argc; i++) {
558 TString regMethod(argv[i]);
559 if(regMethod=="-b" || regMethod=="--batch") continue;
560 if (!methodList.IsNull()) methodList += TString(",");
561 methodList += regMethod;
562 }
563 return TMVAClassification(methodList);
564}
int main()
Definition Prototype.cxx:12
unsigned int UInt_t
Definition RtypesCore.h:46
double Double_t
Definition RtypesCore.h:59
#define gROOT
Definition TROOT.h:404
R__EXTERN TSystem * gSystem
Definition TSystem.h:559
A specialized string object used for TTree selections.
Definition TCut.h:25
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:54
static Bool_t SetCacheFileDir(ROOT::Internal::TStringView cacheDir, Bool_t operateDisconnected=kTRUE, Bool_t forceCacheread=kFALSE)
Definition TFile.h:326
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4025
void Close(Option_t *option="") override
Close a file.
Definition TFile.cxx:899
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetBackgroundWeightExpression(const TString &variable)
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
This is the main MVA steering class.
Definition Factory.h:80
void TrainAllMethods()
Iterates through all booked methods and calls training.
Definition Factory.cxx:1114
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Definition Factory.cxx:352
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Definition Factory.cxx:1271
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
Definition Factory.cxx:1376
static Tools & Instance()
Definition Tools.cxx:71
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at 'separator' and fills the list 'splitV' with the primitive strings
Definition Tools.cxx:1199
@ kFisher
Definition Types.h:82
@ kTMlpANN
Definition Types.h:85
@ kPDEFoam
Definition Types.h:94
@ kLikelihood
Definition Types.h:79
@ kHMatrix
Definition Types.h:81
@ kRuleFit
Definition Types.h:88
@ kCFMlpANN
Definition Types.h:84
virtual const char * GetName() const
Returns name of object.
Definition TNamed.h:47
Basic string class.
Definition TString.h:136
Bool_t IsNull() const
Definition TString.h:407
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition TSystem.cxx:1296
A TTree represents a columnar dataset.
Definition TTree.h:79
Tools & gTools()
void TMVAGui(const char *fName="TMVA.root", TString dataset="")