Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA_SOFIE_Models.py
Go to the documentation of this file.
1### \file
2### \ingroup tutorial_tmva
3### \notebook -nodraw
4### Example of inference with SOFIE using a set of models trained with Keras.
5### This tutorial shows how to store several models in a single header file and
6### the weights in a ROOT binary file.
7### The models are then evaluated using the RDataFrame
8### First, generate the input model by running `TMVA_Higgs_Classification.C`.
9###
10### This tutorial parses the input model and runs the inference using ROOT's JITing capability.
11###
12### \macro_code
13### \macro_output
14### \author Lorenzo Moneta
15
16import ROOT
17from os.path import exists
18
19ROOT.TMVA.PyMethodBase.PyInitialize()
20
21
22## generate and train Keras models with different architectures
23
24import numpy as np
25from tensorflow.keras.models import Sequential
26from tensorflow.keras.layers import Dense
27from tensorflow.keras.optimizers import Adam
28
29from sklearn.model_selection import train_test_split
30
31def CreateModel(nlayers = 4, nunits = 64):
32 model = Sequential()
33 model.add(Dense(nunits, activation='relu',input_dim=7))
34 for i in range(1,nlayers) :
35 model.add(Dense(nunits, activation='relu'))
36
37 model.add(Dense(1, activation='sigmoid'))
38 model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.001), weighted_metrics = ['accuracy'])
39 model.summary()
40 return model
41
42def PrepareData() :
43 #get the input data
44 inputFileName = "Higgs_data.root"
45 inputFile = "http://root.cern.ch/files/" + inputFileName
46
47 df1 = ROOT.RDataFrame("sig_tree", inputFile)
48 sigData = df1.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'])
49 #print(sigData)
50
51 # stack all the 7 numpy array in a single array (nevents x nvars)
52 xsig = np.column_stack(list(sigData.values()))
53 data_sig_size = xsig.shape[0]
54 print("size of data", data_sig_size)
55
56 # make SOFIE inference on background data
57 df2 = ROOT.RDataFrame("bkg_tree", inputFile)
58 bkgData = df2.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'])
59 xbkg = np.column_stack(list(bkgData.values()))
60 data_bkg_size = xbkg.shape[0]
61
62 ysig = np.ones(data_sig_size)
63 ybkg = np.zeros(data_bkg_size)
64 inputs_data = np.concatenate((xsig,xbkg),axis=0)
65 inputs_targets = np.concatenate((ysig,ybkg),axis=0)
66
67 #split data in training and test data
68
69 x_train, x_test, y_train, y_test = train_test_split(
70 inputs_data, inputs_targets, test_size=0.50, random_state=1234)
71
72 return x_train, y_train, x_test, y_test
73
74def TrainModel(model, x, y, name) :
75 model.fit(x,y,epochs=10,batch_size=50)
76 modelFile = name + '.h5'
77 model.save(modelFile)
78 return modelFile
79
80### run the models
81
82x_train, y_train, x_test, y_test = PrepareData()
83
84## create models and train them
85
86model1 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_4L_50')
87model2 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_4L_200')
88model3 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_2L_500')
89
90#evaluate with SOFIE the 3 trained models
91
92
93def GenerateModelCode(modelFile, generatedHeaderFile):
94 model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile)
95
96 print("Generating inference code for the Keras model from ",modelFile,"in the header ", generatedHeaderFile)
97 #Generating inference code using a ROOT binary file
98 model.Generate(ROOT.TMVA.Experimental.SOFIE.Options.kRootBinaryWeightFile)
99 # add option to append to the same file the generated headers (pass True for append flag)
100 model.OutputGenerated(generatedHeaderFile, True)
101 #model.PrintGenerated()
102 return generatedHeaderFile
103
104
105generatedHeaderFile = "Higgs_Model.hxx"
106#need to remove existing header file since we are appending on same one
107import os
108if (os.path.exists(generatedHeaderFile)):
109 weightFile = "Higgs_Model.root"
110 print("removing existing files", generatedHeaderFile,weightFile)
111 os.remove(generatedHeaderFile)
112 os.remove(weightFile)
113
114GenerateModelCode(model1, generatedHeaderFile)
115GenerateModelCode(model2, generatedHeaderFile)
116GenerateModelCode(model3, generatedHeaderFile)
117
118#compile the generated code
119
120ROOT.gInterpreter.Declare('#include "' + generatedHeaderFile + '"')
121
122
123#run the inference on the test data
124session1 = ROOT.TMVA_SOFIE_Higgs_Model_4L_50.Session("Higgs_Model.root")
125session2 = ROOT.TMVA_SOFIE_Higgs_Model_4L_200.Session("Higgs_Model.root")
126session3 = ROOT.TMVA_SOFIE_Higgs_Model_2L_500.Session("Higgs_Model.root")
127
128hs1 = ROOT.TH1D("hs1","Signal result 4L 50",100,0,1)
129hs2 = ROOT.TH1D("hs2","Signal result 4L 200",100,0,1)
130hs3 = ROOT.TH1D("hs3","Signal result 2L 500",100,0,1)
131
132hb1 = ROOT.TH1D("hb1","Background result 4L 50",100,0,1)
133hb2 = ROOT.TH1D("hb2","Background result 4L 200",100,0,1)
134hb3 = ROOT.TH1D("hb3","Background result 2L 500",100,0,1)
135
136def EvalModel(session, x) :
137 result = session.infer(x)
138 return result[0]
139
140for i in range(0,x_test.shape[0]):
141 result1 = EvalModel(session1, x_test[i,:])
142 result2 = EvalModel(session2, x_test[i,:])
143 result3 = EvalModel(session3, x_test[i,:])
144 if (y_test[i] == 1) :
145 hs1.Fill(result1)
146 hs2.Fill(result2)
147 hs3.Fill(result3)
148 else:
149 hb1.Fill(result1)
150 hb2.Fill(result2)
151 hb3.Fill(result3)
152
153def PlotHistos(hs,hb):
154 hs.SetLineColor(ROOT.kRed)
155 hb.SetLineColor(ROOT.kBlue)
156 hs.Draw()
157 hb.Draw("same")
158
159c1 = ROOT.TCanvas()
160c1.Divide(1,3)
161c1.cd(1)
162PlotHistos(hs1,hb1)
163c1.cd(2)
164PlotHistos(hs2,hb2)
165c1.cd(3)
166PlotHistos(hs3,hb3)
167c1.Draw()
168
169## draw also ROC curves
170
171def GetContent(h) :
172 n = h.GetNbinsX()
173 x = ROOT.std.vector['float'](n)
174 w = ROOT.std.vector['float'](n)
175 for i in range(0,n):
176 x[i] = h.GetBinCenter(i+1)
177 w[i] = h.GetBinContent(i+1)
178 return x,w
179
180def MakeROCCurve(hs, hb) :
181 xs,ws = GetContent(hs)
182 xb,wb = GetContent(hb)
183 roc = ROOT.TMVA.ROCCurve(xs,xb,ws,wb)
184 print("ROC integral for ",hs.GetName(), roc.GetROCIntegral())
185 curve = roc.GetROCCurve()
186 curve.SetName(hs.GetName())
187 return roc,curve
188
189c2 = ROOT.TCanvas()
190
191r1,curve1 = MakeROCCurve(hs1,hb1)
192curve1.SetLineColor(ROOT.kRed)
193curve1.Draw("AC")
194
195r2,curve2 = MakeROCCurve(hs2,hb2)
196curve2.SetLineColor(ROOT.kBlue)
197curve2.Draw("C")
198
199r3,curve3 = MakeROCCurve(hs3,hb3)
200curve3.SetLineColor(ROOT.kGreen)
201curve3.Draw("C")
202
203c2.Draw()
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...