Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA_CNN_Classification.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_tmva
3## \notebook
4## TMVA Classification Example Using a Convolutional Neural Network
5##
6## This is an example of using a CNN in TMVA. We do classification using a toy image data set
7## that is generated when running the example macro
8##
9## \macro_image
10## \macro_output
11## \macro_code
12##
13## \author Harshal Shende
14
15
16# TMVA Classification Example Using a Convolutional Neural Network
17
18
19## Helper function to create input images data
20## we create a signal and background 2D histograms from 2d gaussians
21## with a location (means in X and Y) different for each event
22## The difference between signal and background is in the gaussian width.
23## The width for the background gaussian is slightly larger than the signal width by few % values
24
25import ROOT
26
27import os
28import importlib.util
29
30useKerasCNN = False
31
32if ROOT.gSystem.GetFromPipe("root-config --has-tmva-pymva") == "yes":
33 useKerasCNN = True
34
35opt = [1, 1, 1, 1, 1]
36useTMVACNN = opt[0] if len(opt) > 0 else False
37useKerasCNN = opt[1] if len(opt) > 1 else useKerasCNN
38useTMVADNN = opt[2] if len(opt) > 2 else False
39useTMVABDT = opt[3] if len(opt) > 3 else False
40usePyTorchCNN = opt[4] if len(opt) > 4 else False
41
42if useKerasCNN:
43 import tensorflow
44
45# PyTorch has to be imported before ROOT to avoid crashes because of clashing
46# std::regexp symbols that are exported by cppyy.
47# See also: https://github.com/wlav/cppyy/issues/227
48torch_spec = importlib.util.find_spec("torch")
49if torch_spec is None:
50 usePyTorchCNN = False
51 print("TMVA_CNN_Classificaton","Skip using PyTorch since torch is not installed")
52else:
53 import torch
54
55
56import ROOT
57
58
59TMVA = ROOT.TMVA
60TFile = ROOT.TFile
61
63
64def MakeImagesTree(n, nh, nw):
65 # image size (nh x nw)
66 ntot = nh * nw
67 fileOutName = "images_data_16x16.root"
68 nRndmEvts = 10000 # number of events we use to fill each image
69 delta_sigma = 0.1 # 5% difference in the sigma
70 pixelNoise = 5
71
72 sX1 = 3
73 sY1 = 3
74 sX2 = sX1 + delta_sigma
75 sY2 = sY1 - delta_sigma
76 h1 = ROOT.TH2D("h1", "h1", nh, 0, 10, nw, 0, 10)
77 h2 = ROOT.TH2D("h2", "h2", nh, 0, 10, nw, 0, 10)
78 f1 = ROOT.TF2("f1", "xygaus")
79 f2 = ROOT.TF2("f2", "xygaus")
80 sgn = ROOT.TTree("sig_tree", "signal_tree")
81 bkg = ROOT.TTree("bkg_tree", "background_tree")
82
83 f = TFile(fileOutName, "RECREATE")
84 x1 = ROOT.std.vector["float"](ntot)
85 x2 = ROOT.std.vector["float"](ntot)
86
87 # create signal and background trees with a single branch
88 # an std::vector<float> of size nh x nw containing the image data
89 bkg.Branch("vars", "std::vector<float>", x1)
90 sgn.Branch("vars", "std::vector<float>", x2)
91
94
95 f1.SetParameters(1, 5, sX1, 5, sY1)
96 f2.SetParameters(1, 5, sX2, 5, sY2)
98 ROOT.Info("TMVA_CNN_Classification", "Filling ROOT tree \n")
99 for i in range(n):
100 if i % 1000 == 0:
101 print("Generating image event ...", i)
102
103 h1.Reset()
104 h2.Reset()
105 # generate random means in range [3,7] to be not too much on the border
110
111 h1.FillRandom("f1", nRndmEvts)
112 h2.FillRandom("f2", nRndmEvts)
113
114 for k in range(nh):
115 for l in range(nw):
116 m = k * nw + l
117 # add some noise in each bin
118 x1[m] = h1.GetBinContent(k + 1, l + 1) + ROOT.gRandom.Gaus(0, pixelNoise)
119 x2[m] = h2.GetBinContent(k + 1, l + 1) + ROOT.gRandom.Gaus(0, pixelNoise)
120
121 sgn.Fill()
122 bkg.Fill()
123
124 sgn.Write()
125 bkg.Write()
126
127 print("Signal and background tree with images data written to the file %s", f.GetName())
128 sgn.Print()
129 bkg.Print()
130 f.Close()
131
132hasGPU = "tmva-gpu" in ROOT.gROOT.GetConfigFeatures()
133hasCPU = "tmva-cpu" in ROOT.gROOT.GetConfigFeatures()
134
135nevt = 1000 # use a larger value to get better results
136
137if (not hasCPU and not hasGPU) :
138 ROOT.Warning("TMVA_CNN_Classificaton","ROOT is not supporting tmva-cpu and tmva-gpu skip using TMVA-DNN and TMVA-CNN")
139 useTMVACNN = False
140 useTMVADNN = False
141
142if not "tmva-pymva" in ROOT.gROOT.GetConfigFeatures():
143 useKerasCNN = False
144 usePyTorchCNN = False
145else:
147
148if not useTMVACNN:
150 "TMVA_CNN_Classificaton",
151 "TMVA is not build with GPU or CPU multi-thread support. Cannot use TMVA Deep Learning for CNN",
152 )
153
154writeOutputFile = True
155
156num_threads = 4 # use max 4 threads
157max_epochs = 10 # maximum number of epochs used for training
158
159
160# do enable MT running
161if "imt" in ROOT.gROOT.GetConfigFeatures():
162 ROOT.EnableImplicitMT(num_threads)
163 ROOT.gSystem.Setenv("OMP_NUM_THREADS", "1") # switch OFF MT in OpenBLAS
164 print("Running with nthreads = {}".format(ROOT.GetThreadPoolSize()))
165else:
166 print("Running in serial mode since ROOT does not support MT")
167
168
169
170
171outputFile = None
172if writeOutputFile:
173 outputFile = TFile.Open("TMVA_CNN_ClassificationOutput.root", "RECREATE")
174
175
176## Create TMVA Factory
177
178# Create the Factory class. Later you can choose the methods
179# whose performance you'd like to investigate.
180
181# The factory is the major TMVA object you have to interact with. Here is the list of parameters you need to pass
182
183# - The first argument is the base of the name of all the output
184# weight files in the directory weight/ that will be created with the
185# method parameters
186
187# - The second argument is the output file for the training results
188
189# - The third argument is a string option defining some general configuration for the TMVA session.
190# For example all TMVA output can be suppressed by removing the "!" (not) in front of the "Silent" argument in the
191# option string
192
193# - note that we disable any pre-transformation of the input variables and we avoid computing correlations between
194# input variables
195
196
197factory = TMVA.Factory(
198 "TMVA_CNN_Classification",
199 outputFile,
200 V=False,
201 ROC=True,
202 Silent=False,
203 Color=True,
204 AnalysisType="Classification",
205 Transformations=None,
206 Correlations=False,
207)
208
209
210## Declare DataLoader(s)
211
212# The next step is to declare the DataLoader class that deals with input variables
213
214# Define the input variables that shall be used for the MVA training
215# note that you may also use variable expressions, which can be parsed by TTree::Draw( "expression" )]
216
217# In this case the input data consists of an image of 16x16 pixels. Each single pixel is a branch in a ROOT TTree
218
219loader = TMVA.DataLoader("dataset")
220
221
222## Setup Dataset(s)
223
224# Define input data file and signal and background trees
225
226
227imgSize = 16 * 16
228inputFileName = "images_data_16x16.root"
229
230# if the input file does not exist create it
231if ROOT.gSystem.AccessPathName(inputFileName):
232 MakeImagesTree(nevt, 16, 16)
233
234inputFile = TFile.Open(inputFileName)
235if inputFile is None:
236 ROOT.Warning("TMVA_CNN_Classification", "Error opening input file %s - exit", inputFileName.Data())
237
238
239# inputFileName = "tmva_class_example.root"
240
241
242# --- Register the training and test trees
243
244signalTree = inputFile.Get("sig_tree")
245backgroundTree = inputFile.Get("bkg_tree")
246
247nEventsSig = signalTree.GetEntries()
248nEventsBkg = backgroundTree.GetEntries()
249
250# global event weights per tree (see below for setting event-wise weights)
251signalWeight = 1.0
252backgroundWeight = 1.0
253
254# You can add an arbitrary number of signal or background trees
255loader.AddSignalTree(signalTree, signalWeight)
256loader.AddBackgroundTree(backgroundTree, backgroundWeight)
257
258## add event variables (image)
259## use new method (from ROOT 6.20 to add a variable array for all image data)
260loader.AddVariablesArray("vars", imgSize)
261
262# Set individual event weights (the variables must exist in the original TTree)
263# for signal : factory->SetSignalWeightExpression ("weight1*weight2");
264# for background: factory->SetBackgroundWeightExpression("weight1*weight2");
265# loader->SetBackgroundWeightExpression( "weight" );
266
267# Apply additional cuts on the signal and background samples (can be different)
268mycuts = "" # for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
269mycutb = "" # for example: TCut mycutb = "abs(var1)<0.5";
270
271# Tell the factory how to use the training and testing events
272# If no numbers of events are given, half of the events in the tree are used
273# for training, and the other half for testing:
274# loader.PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
275# It is possible also to specify the number of training and testing events,
276# note we disable the computation of the correlation matrix of the input variables
277
278nTrainSig = 0.8 * nEventsSig
279nTrainBkg = 0.8 * nEventsBkg
280
281# build the string options for DataLoader::PrepareTrainingAndTestTree
282
284 mycuts,
285 mycutb,
286 nTrain_Signal=nTrainSig,
287 nTrain_Background=nTrainBkg,
288 SplitMode="Random",
289 SplitSeed=100,
290 NormMode="NumEvents",
291 V=False,
292 CalcCorrelations=False,
293)
294
295
296# DataSetInfo : [dataset] : Added class "Signal"
297# : Add Tree sig_tree of type Signal with 10000 events
298# DataSetInfo : [dataset] : Added class "Background"
299# : Add Tree bkg_tree of type Background with 10000 events
300
301# signalTree.Print();
302
303# Booking Methods
304
305# Here we book the TMVA methods. We book a Boosted Decision Tree method (BDT)
306
307
308# Boosted Decision Trees
309if useTMVABDT:
311 loader,
313 "BDT",
314 V=False,
315 NTrees=400,
316 MinNodeSize="2.5%",
317 MaxDepth=2,
318 BoostType="AdaBoost",
319 AdaBoostBeta=0.5,
320 UseBaggedBoost=True,
321 BaggedSampleFraction=0.5,
322 SeparationType="GiniIndex",
323 nCuts=20,
324 )
325
326
327#### Booking Deep Neural Network
328
329# Here we book the DNN of TMVA. See the example TMVA_Higgs_Classification.C for a detailed description of the
330# options
331
332if useTMVADNN:
333 layoutString = ROOT.TString(
334 "DENSE|100|RELU,BNORM,DENSE|100|RELU,BNORM,DENSE|100|RELU,BNORM,DENSE|100|RELU,DENSE|1|LINEAR"
335 )
336
337 # Training strategies
338 # one can catenate several training strings with different parameters (e.g. learning rates or regularizations
339 # parameters) The training string must be concatenated with the `|` delimiter
340 trainingString1 = ROOT.TString(
341 "LearningRate=1e-3,Momentum=0.9,Repetitions=1,"
342 "ConvergenceSteps=5,BatchSize=100,TestRepetitions=1,"
343 "WeightDecay=1e-4,Regularization=None,"
344 "Optimizer=ADAM,DropConfig=0.0+0.0+0.0+0."
345 ) # + "|" + trainingString2 + ...
346 trainingString1 += ",MaxEpochs=" + str(max_epochs)
347
348 # Build now the full DNN Option string
349 dnnMethodName = "TMVA_DNN_CPU"
350
351 # use GPU if available
352 dnnOptions = "CPU"
353 if hasGPU :
354 dnnOptions = "GPU"
355 dnnMethodName = "TMVA_DNN_GPU"
356
358 loader,
360 dnnMethodName,
361 H=False,
362 V=True,
363 ErrorStrategy="CROSSENTROPY",
364 VarTransform=None,
365 WeightInitialization="XAVIER",
366 Layout=layoutString,
367 TrainingStrategy=trainingString1,
368 Architecture=dnnOptions
369 )
370
371
372### Book Convolutional Neural Network in TMVA
373
374# For building a CNN one needs to define
375
376# - Input Layout : number of channels (in this case = 1) | image height | image width
377# - Batch Layout : batch size | number of channels | image size = (height*width)
378
379# Then one add Convolutional layers and MaxPool layers.
380
381# - For Convolutional layer the option string has to be:
382# - CONV | number of units | filter height | filter width | stride height | stride width | padding height | paddig
383# width | activation function
384
385# - note in this case we are using a filer 3x3 and padding=1 and stride=1 so we get the output dimension of the
386# conv layer equal to the input
387
388# - note we use after the first convolutional layer a batch normalization layer. This seems to help significantly the
389# convergence
390
391# - For the MaxPool layer:
392# - MAXPOOL | pool height | pool width | stride height | stride width
393
394# The RESHAPE layer is needed to flatten the output before the Dense layer
395
396# Note that to run the CNN is required to have CPU or GPU support
397
398
399if useTMVACNN:
400 # Training strategies.
401 trainingString1 = ROOT.TString(
402 "LearningRate=1e-3,Momentum=0.9,Repetitions=1,"
403 "ConvergenceSteps=5,BatchSize=100,TestRepetitions=1,"
404 "WeightDecay=1e-4,Regularization=None,"
405 "Optimizer=ADAM,DropConfig=0.0+0.0+0.0+0.0"
406 )
407 trainingString1 += ",MaxEpochs=" + str(max_epochs)
408
409 ## New DL (CNN)
410 cnnMethodName = "TMVA_CNN_CPU"
411 cnnOptions = "CPU"
412 # use GPU if available
413 if hasGPU:
414 cnnOptions = "GPU"
415 cnnMethodName = "TMVA_CNN_GPU"
416
418 loader,
420 cnnMethodName,
421 H=False,
422 V=True,
423 ErrorStrategy="CROSSENTROPY",
424 VarTransform=None,
425 WeightInitialization="XAVIER",
426 InputLayout="1|16|16",
427 Layout="CONV|10|3|3|1|1|1|1|RELU,BNORM,CONV|10|3|3|1|1|1|1|RELU,MAXPOOL|2|2|1|1,RESHAPE|FLAT,DENSE|100|RELU,DENSE|1|LINEAR",
428 TrainingStrategy=trainingString1,
429 Architecture=cnnOptions,
430 )
431
432
433### Book Convolutional Neural Network in Keras using a generated model
434
435
436if usePyTorchCNN:
437 ROOT.Info("TMVA_CNN_Classification", "Using Convolutional PyTorch Model")
438 pyTorchFileName = str(ROOT.gROOT.GetTutorialDir())
439 pyTorchFileName += "/tmva/PyTorch_Generate_CNN_Model.py"
440 # check that pytorch can be imported and file defining the model exists
441 torch_spec = importlib.util.find_spec("torch")
442 if torch_spec is not None and os.path.exists(pyTorchFileName):
443 #cmd = str(ROOT.TMVA.Python_Executable()) + " " + pyTorchFileName
444 #os.system(cmd)
445 #import PyTorch_Generate_CNN_Model
446 ROOT.Info("TMVA_CNN_Classification", "Booking PyTorch CNN model")
448 loader,
450 "PyTorch",
451 H=True,
452 V=False,
453 VarTransform=None,
454 FilenameModel="PyTorchModelCNN.pt",
455 FilenameTrainedModel="PyTorchTrainedModelCNN.pt",
456 NumEpochs=max_epochs,
457 BatchSize=100,
458 UserCode=str(pyTorchFileName)
459 )
460 else:
462 "TMVA_CNN_Classification",
463 "PyTorch is not installed or model building file is not existing - skip using PyTorch",
464 )
465
466if useKerasCNN:
467 ROOT.Info("TMVA_CNN_Classification", "Building convolutional keras model")
468 # create python script which can be executed
469 # create 2 conv2d layer + maxpool + dense
470 import tensorflow
471 from tensorflow.keras.models import Sequential
472 from tensorflow.keras.optimizers import Adam
473
474 # from keras.initializers import TruncatedNormal
475 # from keras import initializations
476 from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Reshape
477
478 # from keras.callbacks import ReduceLROnPlateau
479 model = Sequential()
480 model.add(Reshape((16, 16, 1), input_shape=(256,)))
481 model.add(Conv2D(10, kernel_size=(3, 3), kernel_initializer="TruncatedNormal", activation="relu", padding="same"))
482 model.add(Conv2D(10, kernel_size=(3, 3), kernel_initializer="TruncatedNormal", activation="relu", padding="same"))
483 # stride for maxpool is equal to pool size
484 model.add(MaxPooling2D(pool_size=(2, 2)))
485 model.add(Flatten())
486 model.add(Dense(64, activation="tanh"))
487 # model.add(Dropout(0.2))
488 model.add(Dense(2, activation="sigmoid"))
489 model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"])
490 model.save("model_cnn.h5")
492
493 if not os.path.exists("model_cnn.h5"):
494 raise FileNotFoundError("Error creating Keras model file - skip using Keras")
495 else:
496 # book PyKeras method only if Keras model could be created
497 ROOT.Info("TMVA_CNN_Classification", "Booking convolutional keras model")
499 loader,
501 "PyKeras",
502 H=True,
503 V=False,
504 VarTransform=None,
505 FilenameModel="model_cnn.h5",
506 FilenameTrainedModel="trained_model_cnn.h5",
507 NumEpochs=max_epochs,
508 BatchSize=100,
509 GpuOptions="allow_growth=True",
510 ) # needed for RTX NVidia card and to avoid TF allocates all GPU memory
511
512
513
514## Train Methods
515
517
518## Test and Evaluate Methods
519
521
523
524## Plot ROC Curve
525
526c1 = factory.GetROCCurve(loader)
527c1.Draw()
528
529# close outputfile to save output file
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
This is the main MVA steering class.
Definition Factory.h:80