Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
TMVA_CNN_Classification.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_ml
3## \notebook
4## TMVA Classification Example Using a Convolutional Neural Network
5##
6## This is an example of using a CNN in TMVA. We do classification using a toy image data set
7## that is generated when running the example macro
8##
9## \macro_image
10## \macro_output
11## \macro_code
12##
13## \author Harshal Shende
14
15
16# TMVA Classification Example Using a Convolutional Neural Network
17
18
19## Helper function to create input images data
20## we create a signal and background 2D histograms from 2d gaussians
21## with a location (means in X and Y) different for each event
22## The difference between signal and background is in the gaussian width.
23## The width for the background gaussian is slightly larger than the signal width by few % values
24
25import importlib.util
26import os
27
28import ROOT
29
30opt = [1, 1, 1, 1, 1]
31useTMVACNN = opt[0] if len(opt) > 0 else False
32useKerasCNN = opt[1] if len(opt) > 1 else False
33useTMVADNN = opt[2] if len(opt) > 2 else False
34useTMVABDT = opt[3] if len(opt) > 3 else False
35usePyTorchCNN = opt[4] if len(opt) > 4 else False
36
37TMVA = ROOT.TMVA
38TFile = ROOT.TFile
39
41
42def MakeImagesTree(n, nh, nw):
43 # image size (nh x nw)
44 ntot = nh * nw
45 fileOutName = "images_data_16x16.root"
46 nRndmEvts = 10000 # number of events we use to fill each image
47 delta_sigma = 0.1 # 5% difference in the sigma
48 pixelNoise = 5
49
50 sX1 = 3
51 sY1 = 3
52 sX2 = sX1 + delta_sigma
53 sY2 = sY1 - delta_sigma
54 h1 = ROOT.TH2D("h1", "h1", nh, 0, 10, nw, 0, 10)
55 h2 = ROOT.TH2D("h2", "h2", nh, 0, 10, nw, 0, 10)
56 f1 = ROOT.TF2("f1", "xygaus")
57 f2 = ROOT.TF2("f2", "xygaus")
58 sgn = ROOT.TTree("sig_tree", "signal_tree")
59 bkg = ROOT.TTree("bkg_tree", "background_tree")
60
61 f = TFile(fileOutName, "RECREATE")
62 x1 = ROOT.std.vector["float"](ntot)
63 x2 = ROOT.std.vector["float"](ntot)
64
65 # create signal and background trees with a single branch
66 # an std::vector<float> of size nh x nw containing the image data
67 bkg.Branch("vars", "std::vector<float>", x1)
68 sgn.Branch("vars", "std::vector<float>", x2)
69
70 sgn.SetDirectory(f)
71 bkg.SetDirectory(f)
72
73 f1.SetParameters(1, 5, sX1, 5, sY1)
74 f2.SetParameters(1, 5, sX2, 5, sY2)
75 ROOT.gRandom.SetSeed(0)
76 ROOT.Info("TMVA_CNN_Classification", "Filling ROOT tree \n")
77 for i in range(n):
78 if i % 1000 == 0:
79 print("Generating image event ...", i)
80
81 h1.Reset()
82 h2.Reset()
83 # generate random means in range [3,7] to be not too much on the border
84 f1.SetParameter(1, ROOT.gRandom.Uniform(3, 7))
85 f1.SetParameter(3, ROOT.gRandom.Uniform(3, 7))
86 f2.SetParameter(1, ROOT.gRandom.Uniform(3, 7))
87 f2.SetParameter(3, ROOT.gRandom.Uniform(3, 7))
88
89 h1.FillRandom(f1, nRndmEvts)
90 h2.FillRandom(f2, nRndmEvts)
91
92 for k in range(nh):
93 for l in range(nw):
94 m = k * nw + l
95 # add some noise in each bin
96 x1[m] = h1.GetBinContent(k + 1, l + 1) + ROOT.gRandom.Gaus(0, pixelNoise)
97 x2[m] = h2.GetBinContent(k + 1, l + 1) + ROOT.gRandom.Gaus(0, pixelNoise)
98
99 sgn.Fill()
100 bkg.Fill()
101
102 sgn.Write()
103 bkg.Write()
104
105 print("Signal and background tree with images data written to the file %s", f.GetName())
106 sgn.Print()
107 bkg.Print()
108 f.Close()
109
110hasGPU = "tmva-gpu" in ROOT.gROOT.GetConfigFeatures()
111hasCPU = "tmva-cpu" in ROOT.gROOT.GetConfigFeatures()
112
113nevt = 1000 # use a larger value to get better results
114
115if (not hasCPU and not hasGPU) :
116 ROOT.Warning("TMVA_CNN_Classificaton","ROOT is not supporting tmva-cpu and tmva-gpu skip using TMVA-DNN and TMVA-CNN")
117 useTMVACNN = False
118 useTMVADNN = False
119
120if "tmva-pymva" not in ROOT.gROOT.GetConfigFeatures():
121 useKerasCNN = False
122 usePyTorchCNN = False
123else:
125
126if not useTMVACNN:
127 ROOT.Warning(
128 "TMVA_CNN_Classificaton",
129 "TMVA is not build with GPU or CPU multi-thread support. Cannot use TMVA Deep Learning for CNN",
130 )
131
132writeOutputFile = True
133
134num_threads = 4 # use max 4 threads
135max_epochs = 10 # maximum number of epochs used for training
136
137
138# do enable MT running
139if "imt" in ROOT.gROOT.GetConfigFeatures():
140 ROOT.EnableImplicitMT(num_threads)
141 ROOT.gSystem.Setenv("OMP_NUM_THREADS", "1") # switch OFF MT in OpenBLAS
142 print("Running with nthreads = {}".format(ROOT.GetThreadPoolSize()))
143else:
144 print("Running in serial mode since ROOT does not support MT")
145
146
147
148
149outputFile = None
150if writeOutputFile:
151 outputFile = TFile.Open("TMVA_CNN_ClassificationOutput.root", "RECREATE")
152
153
154## Create TMVA Factory
155
156# Create the Factory class. Later you can choose the methods
157# whose performance you'd like to investigate.
158
159# The factory is the major TMVA object you have to interact with. Here is the list of parameters you need to pass
160
161# - The first argument is the base of the name of all the output
162# weight files in the directory weight/ that will be created with the
163# method parameters
164
165# - The second argument is the output file for the training results
166
167# - The third argument is a string option defining some general configuration for the TMVA session.
168# For example all TMVA output can be suppressed by removing the "!" (not) in front of the "Silent" argument in the
169# option string
170
171# - note that we disable any pre-transformation of the input variables and we avoid computing correlations between
172# input variables
173
174
175factory = TMVA.Factory(
176 "TMVA_CNN_Classification",
177 outputFile,
178 V=False,
179 ROC=True,
180 Silent=False,
181 Color=True,
182 AnalysisType="Classification",
183 Transformations=None,
184 Correlations=False,
185)
186
187
188## Declare DataLoader(s)
189
190# The next step is to declare the DataLoader class that deals with input variables
191
192# Define the input variables that shall be used for the MVA training
193# note that you may also use variable expressions, which can be parsed by TTree::Draw( "expression" )]
194
195# In this case the input data consists of an image of 16x16 pixels. Each single pixel is a branch in a ROOT TTree
196
197loader = TMVA.DataLoader("dataset")
198
199
200## Setup Dataset(s)
201
202# Define input data file and signal and background trees
203
204
205imgSize = 16 * 16
206inputFileName = "images_data_16x16.root"
207
208# if the input file does not exist create it
209if ROOT.gSystem.AccessPathName(inputFileName):
210 MakeImagesTree(nevt, 16, 16)
211
212inputFile = TFile.Open(inputFileName)
213if inputFile is None:
214 ROOT.Warning("TMVA_CNN_Classification", "Error opening input file %s - exit", inputFileName.Data())
215
216
217# inputFileName = "tmva_class_example.root"
218
219
220# --- Register the training and test trees
221
222signalTree = inputFile.Get("sig_tree")
223backgroundTree = inputFile.Get("bkg_tree")
224
225nEventsSig = signalTree.GetEntries()
226nEventsBkg = backgroundTree.GetEntries()
227
228# global event weights per tree (see below for setting event-wise weights)
229signalWeight = 1.0
230backgroundWeight = 1.0
231
232# You can add an arbitrary number of signal or background trees
233loader.AddSignalTree(signalTree, signalWeight)
234loader.AddBackgroundTree(backgroundTree, backgroundWeight)
235
236## add event variables (image)
237## use new method (from ROOT 6.20 to add a variable array for all image data)
238loader.AddVariablesArray("vars", imgSize)
239
240# Set individual event weights (the variables must exist in the original TTree)
241# for signal : factory->SetSignalWeightExpression ("weight1*weight2");
242# for background: factory->SetBackgroundWeightExpression("weight1*weight2");
243# loader->SetBackgroundWeightExpression( "weight" );
244
245# Apply additional cuts on the signal and background samples (can be different)
246mycuts = "" # for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
247mycutb = "" # for example: TCut mycutb = "abs(var1)<0.5";
248
249# Tell the factory how to use the training and testing events
250# If no numbers of events are given, half of the events in the tree are used
251# for training, and the other half for testing:
252# loader.PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
253# It is possible also to specify the number of training and testing events,
254# note we disable the computation of the correlation matrix of the input variables
255
256nTrainSig = 0.8 * nEventsSig
257nTrainBkg = 0.8 * nEventsBkg
258
259# build the string options for DataLoader::PrepareTrainingAndTestTree
260
261loader.PrepareTrainingAndTestTree(
262 mycuts,
263 mycutb,
264 nTrain_Signal=nTrainSig,
265 nTrain_Background=nTrainBkg,
266 SplitMode="Random",
267 SplitSeed=100,
268 NormMode="NumEvents",
269 V=False,
270 CalcCorrelations=False,
271)
272
273
274# DataSetInfo : [dataset] : Added class "Signal"
275# : Add Tree sig_tree of type Signal with 10000 events
276# DataSetInfo : [dataset] : Added class "Background"
277# : Add Tree bkg_tree of type Background with 10000 events
278
279# signalTree.Print();
280
281# Booking Methods
282
283# Here we book the TMVA methods. We book a Boosted Decision Tree method (BDT)
284
285
286# Boosted Decision Trees
287if useTMVABDT:
288 factory.BookMethod(
289 loader,
290 TMVA.Types.kBDT,
291 "BDT",
292 V=False,
293 NTrees=400,
294 MinNodeSize="2.5%",
295 MaxDepth=2,
296 BoostType="AdaBoost",
297 AdaBoostBeta=0.5,
298 UseBaggedBoost=True,
299 BaggedSampleFraction=0.5,
300 SeparationType="GiniIndex",
301 nCuts=20,
302 )
303
304
305#### Booking Deep Neural Network
306
307# Here we book the DNN of TMVA. See the example TMVA_Higgs_Classification.C for a detailed description of the
308# options
309
310if useTMVADNN:
311 layoutString = ROOT.TString(
312 "DENSE|100|RELU,BNORM,DENSE|100|RELU,BNORM,DENSE|100|RELU,BNORM,DENSE|100|RELU,DENSE|1|LINEAR"
313 )
314
315 # Training strategies
316 # one can catenate several training strings with different parameters (e.g. learning rates or regularizations
317 # parameters) The training string must be concatenated with the `|` delimiter
318 trainingString1 = ROOT.TString(
319 "LearningRate=1e-3,Momentum=0.9,Repetitions=1,"
320 "ConvergenceSteps=5,BatchSize=100,TestRepetitions=1,"
321 "WeightDecay=1e-4,Regularization=None,"
322 "Optimizer=ADAM,DropConfig=0.0+0.0+0.0+0."
323 ) # + "|" + trainingString2 + ...
324 trainingString1 += ",MaxEpochs=" + str(max_epochs)
325
326 # Build now the full DNN Option string
327 dnnMethodName = "TMVA_DNN_CPU"
328
329 # use GPU if available
330 dnnOptions = "CPU"
331 if hasGPU :
332 dnnOptions = "GPU"
333 dnnMethodName = "TMVA_DNN_GPU"
334
335 factory.BookMethod(
336 loader,
337 TMVA.Types.kDL,
338 dnnMethodName,
339 H=False,
340 V=True,
341 ErrorStrategy="CROSSENTROPY",
342 VarTransform=None,
343 WeightInitialization="XAVIER",
344 Layout=layoutString,
345 TrainingStrategy=trainingString1,
346 Architecture=dnnOptions
347 )
348
349
350### Book Convolutional Neural Network in TMVA
351
352# For building a CNN one needs to define
353
354# - Input Layout : number of channels (in this case = 1) | image height | image width
355# - Batch Layout : batch size | number of channels | image size = (height*width)
356
357# Then one add Convolutional layers and MaxPool layers.
358
359# - For Convolutional layer the option string has to be:
360# - CONV | number of units | filter height | filter width | stride height | stride width | padding height | paddig
361# width | activation function
362
363# - note in this case we are using a filer 3x3 and padding=1 and stride=1 so we get the output dimension of the
364# conv layer equal to the input
365
366# - note we use after the first convolutional layer a batch normalization layer. This seems to help significantly the
367# convergence
368
369# - For the MaxPool layer:
370# - MAXPOOL | pool height | pool width | stride height | stride width
371
372# The RESHAPE layer is needed to flatten the output before the Dense layer
373
374# Note that to run the CNN is required to have CPU or GPU support
375
376
377if useTMVACNN:
378 # Training strategies.
379 trainingString1 = ROOT.TString(
380 "LearningRate=1e-3,Momentum=0.9,Repetitions=1,"
381 "ConvergenceSteps=5,BatchSize=100,TestRepetitions=1,"
382 "WeightDecay=1e-4,Regularization=None,"
383 "Optimizer=ADAM,DropConfig=0.0+0.0+0.0+0.0"
384 )
385 trainingString1 += ",MaxEpochs=" + str(max_epochs)
386
387 ## New DL (CNN)
388 cnnMethodName = "TMVA_CNN_CPU"
389 cnnOptions = "CPU"
390 # use GPU if available
391 if hasGPU:
392 cnnOptions = "GPU"
393 cnnMethodName = "TMVA_CNN_GPU"
394
395 factory.BookMethod(
396 loader,
397 TMVA.Types.kDL,
398 cnnMethodName,
399 H=False,
400 V=True,
401 ErrorStrategy="CROSSENTROPY",
402 VarTransform=None,
403 WeightInitialization="XAVIER",
404 InputLayout="1|16|16",
405 Layout="CONV|10|3|3|1|1|1|1|RELU,BNORM,CONV|10|3|3|1|1|1|1|RELU,MAXPOOL|2|2|1|1,RESHAPE|FLAT,DENSE|100|RELU,DENSE|1|LINEAR",
406 TrainingStrategy=trainingString1,
407 Architecture=cnnOptions,
408 )
409
410
411### Book Convolutional Neural Network in Keras using a generated model
412
413
414if usePyTorchCNN:
415 ROOT.Info("TMVA_CNN_Classification", "Using Convolutional PyTorch Model")
416 pyTorchFileName = str(ROOT.gROOT.GetTutorialDir())
417 pyTorchFileName += "/machine_learning/PyTorch_Generate_CNN_Model.py"
418 # check that pytorch can be imported and file defining the model exists
419 torch_spec = importlib.util.find_spec("torch")
420 if torch_spec is not None and os.path.exists(pyTorchFileName):
421 #cmd = str(ROOT.TMVA.Python_Executable()) + " " + pyTorchFileName
422 #os.system(cmd)
423 #import PyTorch_Generate_CNN_Model
424 ROOT.Info("TMVA_CNN_Classification", "Booking PyTorch CNN model")
425 factory.BookMethod(
426 loader,
427 TMVA.Types.kPyTorch,
428 "PyTorch",
429 H=True,
430 V=False,
431 VarTransform=None,
432 FilenameModel="PyTorchModelCNN.pt",
433 FilenameTrainedModel="PyTorchTrainedModelCNN.pt",
434 NumEpochs=max_epochs,
435 BatchSize=100,
436 UserCode=str(pyTorchFileName)
437 )
438 else:
439 ROOT.Warning(
440 "TMVA_CNN_Classification",
441 "PyTorch is not installed or model building file is not existing - skip using PyTorch",
442 )
443
444if useKerasCNN:
445 ROOT.Info("TMVA_CNN_Classification", "Building convolutional keras model")
446 # create python script which can be executed
447 # create 2 conv2d layer + maxpool + dense
448 # from keras.initializers import TruncatedNormal
449 # from keras import initializations
450 from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D, Reshape
451 from tensorflow.keras.models import Sequential
452 from tensorflow.keras.optimizers import Adam
453
454 # from keras.callbacks import ReduceLROnPlateau
455 model = Sequential()
456 model.add(Reshape((16, 16, 1), input_shape=(256,)))
457 model.add(Conv2D(10, kernel_size=(3, 3), kernel_initializer="TruncatedNormal", activation="relu", padding="same"))
458 model.add(Conv2D(10, kernel_size=(3, 3), kernel_initializer="TruncatedNormal", activation="relu", padding="same"))
459 # stride for maxpool is equal to pool size
460 model.add(MaxPooling2D(pool_size=(2, 2)))
461 model.add(Flatten())
462 model.add(Dense(64, activation="tanh"))
463 # model.add(Dropout(0.2))
464 model.add(Dense(2, activation="sigmoid"))
465 model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"])
466 model.save("model_cnn.keras")
467 model.summary()
468
469 if not os.path.exists("model_cnn.keras"):
470 raise FileNotFoundError("Error creating Keras model file - skip using Keras")
471 else:
472 # book PyKeras method only if Keras model could be created
473 ROOT.Info("TMVA_CNN_Classification", "Booking convolutional keras model")
474 factory.BookMethod(
475 loader,
476 TMVA.Types.kPyKeras,
477 "PyKeras",
478 H=True,
479 V=False,
480 VarTransform=None,
481 FilenameModel="model_cnn.keras",
482 FilenameTrainedModel="trained_model_cnn.keras",
483 NumEpochs=max_epochs,
484 BatchSize=100,
485 GpuOptions="allow_growth=True",
486 ) # needed for RTX NVidia card and to avoid TF allocates all GPU memory
487
488
489
490## Train Methods
491
492factory.TrainAllMethods()
493
494## Test and Evaluate Methods
495
496factory.TestAllMethods()
497
498factory.EvaluateAllMethods()
499
500## Plot ROC Curve
501
502c1 = factory.GetROCCurve(loader)
503c1.Draw()
504
505# close outputfile to save output file
506outputFile.Close()
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3787
This is the main MVA steering class.
Definition Factory.h:80
static void PyInitialize()
Initialize Python interpreter.
static Tools & Instance()
Definition Tools.cxx:72
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:613
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.
Definition TROOT.cxx:676