Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA_SOFIE_Keras_HiggsModel.py
Go to the documentation of this file.
1### \file
2### \ingroup tutorial_ml
3### \notebook -nodraw
4### This macro run the SOFIE parser on the Keras model
5### obtaining running TMVA_Higgs_Classification.C
6### You need to run that macro before this one
7###
8### \author Lorenzo Moneta
9
10
11import contextlib
12import warnings
13from os.path import exists
14
15import numpy as np
16import ROOT
17from keras import layers, models
18from sklearn.model_selection import train_test_split
19
20
21@contextlib.contextmanager
22def expect_warning(category, message):
23 """Silence a known third-party warning and raise if it stops firing.
24
25 Notifies us to drop the workaround once the upstream library is fixed.
26 """
27 with warnings.catch_warnings(record=True) as caught:
28 warnings.simplefilter("always")
29 yield
30 seen = False
31 for w in caught:
32 if issubclass(w.category, category) and message in str(w.message):
33 seen = True
34 else:
36 if not seen:
37 raise RuntimeError(
38 f"Expected {category.__name__} containing {message!r} was not "
39 "emitted. This tutorial's workaround can probably be removed."
40 )
41
42
43def CreateModel(nlayers=4, nunits=64):
44 input = layers.Input(shape=(7,))
45 x = input
46 for i in range(1, nlayers):
47 y = layers.Dense(nunits, activation="relu")(x)
48 x = y
49
50 output = layers.Dense(1, activation="sigmoid")(x)
51 model = models.Model(input, output)
52 model.compile(loss="binary_crossentropy", optimizer="adam", weighted_metrics=["accuracy"])
54 return model
55
56
57def PrepareData():
58 # get the input data
59 inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root"
60
61 df1 = ROOT.RDataFrame("sig_tree", inputFile)
62 sigData = df1.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"])
63 # print(sigData)
64
65 # stack all the 7 numpy array in a single array (nevents x nvars)
66 xsig = np.column_stack(list(sigData.values()))
67 data_sig_size = xsig.shape[0]
68 print("size of data", data_sig_size)
69
70 # make SOFIE inference on background data
71 df2 = ROOT.RDataFrame("bkg_tree", inputFile)
72 bkgData = df2.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"])
73 xbkg = np.column_stack(list(bkgData.values()))
74 data_bkg_size = xbkg.shape[0]
75
76 ysig = np.ones(data_sig_size)
77 ybkg = np.zeros(data_bkg_size)
78 inputs_data = np.concatenate((xsig, xbkg), axis=0)
79 inputs_targets = np.concatenate((ysig, ybkg), axis=0)
80
81 # split data in training and test data
82
83 x_train, x_test, y_train, y_test = train_test_split(inputs_data, inputs_targets, test_size=0.50, random_state=1234)
84
85 return x_train, y_train, x_test, y_test
86
87
88def TrainModel(model, x, y, name):
89 model.fit(x, y, epochs=5, batch_size=50)
90 modelFile = name + ".keras"
91
92 # Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet implement
93 # the NumPy 2.0 ``__array__(copy=...)`` signature, so saving the model emits a
94 # DeprecationWarning that we cannot fix from user code.
95 if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0):
96 ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword")
97 else:
99
100 with ctx:
101 model.save(modelFile)
102
103 return model, modelFile
104
105
106def GenerateCode(modelFile="model.keras"):
107
108 # check if the input file exists
109 if not exists(modelFile):
110 raise FileNotFoundError(
111 "INput model file not existing. You need to run TMVA_Higgs_Classification.C to generate the Keras trained model"
112 )
113
114 # parse the input Keras model into RModel object (force batch size to be 1)
116
117 # Generating inference code
120
121 modelName = modelFile.replace(".keras", "")
122 return modelName
123
124
125###################################################################
126## Step 1 : Create and Train model
127###################################################################
128
129x_train, y_train, x_test, y_test = PrepareData()
130# create dense model with 3 layers of 64 units
131model = CreateModel(3, 64)
132model, modelFile = TrainModel(model, x_train, y_train, "HiggsModel")
133
134###################################################################
135## Step 2 : Parse model and generate inference code with SOFIE
136###################################################################
137
138modelName = GenerateCode(modelFile)
139modelHeaderFile = modelName + ".hxx"
140
141###################################################################
142## Step 3 : Compile the generated C++ model code
143###################################################################
144
145ROOT.gInterpreter.Declare('#include "' + modelHeaderFile + '"')
146
147###################################################################
148## Step 4: Evaluate the model
149###################################################################
150
151# get first the SOFIE session namespace
152sofie = getattr(ROOT, "TMVA_SOFIE_" + modelName)
153session = sofie.Session()
154
156y = session.infer(x)
157ykeras = model(x.reshape(1, 7)).numpy()
158
159print("input to model is ", x, "\n\t -> output using SOFIE = ", y[0], " using Keras = ", ykeras[0])
160
161if abs(y[0] - ykeras[0]) > 0.01:
162 raise RuntimeError("ERROR: Result is different between SOFIE and Keras")
163
164print("OK")
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...