Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA_SOFIE_Models.py
Go to the documentation of this file.
1### \file
2### \ingroup tutorial_ml
3### \notebook -nodraw
4### Example of inference with SOFIE using a set of models trained with Keras.
5### This tutorial shows how to store several models in a single header file and
6### the weights in a ROOT binary file.
7### The models are then evaluated using the RDataFrame
8### First, generate the input model by running `TMVA_Higgs_Classification.C`.
9###
10### This tutorial parses the input model and runs the inference using ROOT's JITing capability.
11###
12### \macro_code
13### \macro_output
14### \author Lorenzo Moneta
15
16import contextlib
17import os
18import warnings
19
20import numpy as np
21import ROOT
22from sklearn.model_selection import train_test_split
23from tensorflow.keras.layers import Dense, Input
24from tensorflow.keras.models import Sequential
25from tensorflow.keras.optimizers import Adam
26
27
28@contextlib.contextmanager
29def expect_warning(category, message):
30 """Silence a known third-party warning. Raise if it stops firing.
31
32 Notifies us to drop the workaround once the upstream library is fixed.
33 """
34 with warnings.catch_warnings(record=True) as caught:
35 warnings.simplefilter("always")
36 yield
37 seen = False
38 for w in caught:
39 if issubclass(w.category, category) and message in str(w.message):
40 seen = True
41 else:
43 if not seen:
44 raise RuntimeError(
45 f"Expected {category.__name__} containing {message!r} was not "
46 "emitted. This tutorial's workaround can probably be removed."
47 )
48
49
50## generate and train Keras models with different architectures
51
52
53def CreateModel(nlayers=4, nunits=64):
54 model = Sequential()
55 model.add(Input(shape=(7,)))
56 model.add(Dense(nunits, activation="relu"))
57 for i in range(1, nlayers):
58 model.add(Dense(nunits, activation="relu"))
59
60 model.add(Dense(1, activation="sigmoid"))
61 model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"])
63 return model
64
65
66def PrepareData():
67 # get the input data
68 inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root"
69
70 df1 = ROOT.RDataFrame("sig_tree", inputFile)
71 sigData = df1.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"])
72 # print(sigData)
73
74 # stack all the 7 numpy array in a single array (nevents x nvars)
75 xsig = np.column_stack(list(sigData.values()))
76 data_sig_size = xsig.shape[0]
77 print("size of data", data_sig_size)
78
79 # make SOFIE inference on background data
80 df2 = ROOT.RDataFrame("bkg_tree", inputFile)
81 bkgData = df2.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"])
82 xbkg = np.column_stack(list(bkgData.values()))
83 data_bkg_size = xbkg.shape[0]
84
85 ysig = np.ones(data_sig_size)
86 ybkg = np.zeros(data_bkg_size)
87 inputs_data = np.concatenate((xsig, xbkg), axis=0)
88 inputs_targets = np.concatenate((ysig, ybkg), axis=0)
89
90 # split data in training and test data
91
92 x_train, x_test, y_train, y_test = train_test_split(inputs_data, inputs_targets, test_size=0.50, random_state=1234)
93
94 return x_train, y_train, x_test, y_test
95
96
97def TrainModel(model, x, y, name):
98 model.fit(x, y, epochs=5, batch_size=50)
99 modelFile = name + ".keras"
100 # Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet
101 # implement the NumPy 2.0 ``__array__(copy=...)`` signature, so saving
102 # emits a DeprecationWarning that we cannot fix from user code.
103 if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0):
104 ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword")
105 else:
107
108 with ctx:
109 model.save(modelFile)
110 return modelFile
111
112
113### run the models
114
115x_train, y_train, x_test, y_test = PrepareData()
116
117## create models and train them
118
119model1 = TrainModel(CreateModel(4, 64), x_train, y_train, "Higgs_Model_4L_50")
120model2 = TrainModel(CreateModel(4, 64), x_train, y_train, "Higgs_Model_4L_200")
121model3 = TrainModel(CreateModel(4, 64), x_train, y_train, "Higgs_Model_2L_500")
122
123# evaluate with SOFIE the 3 trained models
124
125
126def GenerateModelCode(modelFile, generatedHeaderFile):
128
129 print("Generating inference code for the Keras model from ", modelFile, "in the header ", generatedHeaderFile)
130 # Generating inference code using a ROOT binary file
132 # add option to append to the same file the generated headers (pass True for append flag)
133 model.OutputGenerated(generatedHeaderFile, True)
134 # model.PrintGenerated()
135 return generatedHeaderFile
136
137
138generatedHeaderFile = "Higgs_Model.hxx"
139# need to remove existing header file since we are appending on same one
140if os.path.exists(generatedHeaderFile):
141 print("removing existing file", generatedHeaderFile)
142 os.remove(generatedHeaderFile)
143
144weightFile = "Higgs_Model.root"
145if os.path.exists(weightFile):
146 print("removing existing file", weightFile)
147 os.remove(weightFile)
148
149GenerateModelCode(model1, generatedHeaderFile)
150GenerateModelCode(model2, generatedHeaderFile)
151GenerateModelCode(model3, generatedHeaderFile)
152
153# compile the generated code
154
155ROOT.gInterpreter.Declare('#include "' + generatedHeaderFile + '"')
156
157
158# run the inference on the test data
159session1 = ROOT.TMVA_SOFIE_Higgs_Model_4L_50.Session("Higgs_Model.root")
160session2 = ROOT.TMVA_SOFIE_Higgs_Model_4L_200.Session("Higgs_Model.root")
161session3 = ROOT.TMVA_SOFIE_Higgs_Model_2L_500.Session("Higgs_Model.root")
162
163hs1 = ROOT.TH1D("hs1", "Signal result 4L 50", 100, 0, 1)
164hs2 = ROOT.TH1D("hs2", "Signal result 4L 200", 100, 0, 1)
165hs3 = ROOT.TH1D("hs3", "Signal result 2L 500", 100, 0, 1)
166
167hb1 = ROOT.TH1D("hb1", "Background result 4L 50", 100, 0, 1)
168hb2 = ROOT.TH1D("hb2", "Background result 4L 200", 100, 0, 1)
169hb3 = ROOT.TH1D("hb3", "Background result 2L 500", 100, 0, 1)
170
171
172def EvalModel(session, x):
173 result = session.infer(x)
174 return result[0]
175
176
177for i in range(0, x_test.shape[0]):
178 result1 = EvalModel(session1, x_test[i, :])
179 result2 = EvalModel(session2, x_test[i, :])
180 result3 = EvalModel(session3, x_test[i, :])
181 if y_test[i] == 1:
182 hs1.Fill(result1)
183 hs2.Fill(result2)
184 hs3.Fill(result3)
185 else:
186 hb1.Fill(result1)
187 hb2.Fill(result2)
188 hb3.Fill(result3)
189
190
191def PlotHistos(hs, hb):
192 hs.SetLineColor("kRed")
193 hb.SetLineColor("kBlue")
194 hs.Draw()
195 hb.Draw("same")
196
197
198c1 = ROOT.TCanvas()
199c1.Divide(1, 3)
200c1.cd(1)
201PlotHistos(hs1, hb1)
202c1.cd(2)
203PlotHistos(hs2, hb2)
204c1.cd(3)
205PlotHistos(hs3, hb3)
206c1.Draw()
207
208## draw also ROC curves
209
210
211def GetContent(h):
212 n = h.GetNbinsX()
213 x = ROOT.std.vector["float"](n)
214 w = ROOT.std.vector["float"](n)
215 for i in range(0, n):
216 x[i] = h.GetBinCenter(i + 1)
217 w[i] = h.GetBinContent(i + 1)
218 return x, w
219
220
221def MakeROCCurve(hs, hb):
222 xs, ws = GetContent(hs)
223 xb, wb = GetContent(hb)
224 roc = ROOT.TMVA.ROCCurve(xs, xb, ws, wb)
225 print("ROC integral for ", hs.GetName(), roc.GetROCIntegral())
226 curve = roc.GetROCCurve()
228 return roc, curve
229
230
231c2 = ROOT.TCanvas()
232
233r1, curve1 = MakeROCCurve(hs1, hb1)
235curve1.Draw("AC")
236
237r2, curve2 = MakeROCCurve(hs2, hb2)
238curve2.SetLineColor("kBlue")
239curve2.Draw("C")
240
241r3, curve3 = MakeROCCurve(hs3, hb3)
242curve3.SetLineColor("kGreen")
243curve3.Draw("C")
244
245c2.Draw()
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...