Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA_SOFIE_Models.py
Go to the documentation of this file.
1### \file
2### \ingroup tutorial_ml
3### \notebook -nodraw
4### Example of inference with SOFIE using a set of models trained with Keras.
5### This tutorial shows how to store several models in a single header file and
6### the weights in a ROOT binary file.
7### The models are then evaluated using the RDataFrame
8### First, generate the input model by running `TMVA_Higgs_Classification.C`.
9###
10### This tutorial parses the input model and runs the inference using ROOT's JITing capability.
11###
12### \macro_code
13### \macro_output
14### \author Lorenzo Moneta
15
16import os
17
18import numpy as np
19import ROOT
20from sklearn.model_selection import train_test_split
21from tensorflow.keras.layers import Dense
22from tensorflow.keras.models import Sequential
23from tensorflow.keras.optimizers import Adam
24
25## generate and train Keras models with different architectures
26
27
28def CreateModel(nlayers = 4, nunits = 64):
29 model = Sequential()
30 model.add(Dense(nunits, activation='relu',input_dim=7))
31 for i in range(1,nlayers) :
32 model.add(Dense(nunits, activation='relu'))
33
34 model.add(Dense(1, activation='sigmoid'))
35 model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.001), weighted_metrics = ['accuracy'])
37 return model
38
39def PrepareData() :
40 #get the input data
41 inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root"
42
43 df1 = ROOT.RDataFrame("sig_tree", inputFile)
44 sigData = df1.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'])
45 #print(sigData)
46
47 # stack all the 7 numpy array in a single array (nevents x nvars)
48 xsig = np.column_stack(list(sigData.values()))
49 data_sig_size = xsig.shape[0]
50 print("size of data", data_sig_size)
51
52 # make SOFIE inference on background data
53 df2 = ROOT.RDataFrame("bkg_tree", inputFile)
54 bkgData = df2.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'])
55 xbkg = np.column_stack(list(bkgData.values()))
56 data_bkg_size = xbkg.shape[0]
57
58 ysig = np.ones(data_sig_size)
59 ybkg = np.zeros(data_bkg_size)
60 inputs_data = np.concatenate((xsig,xbkg),axis=0)
61 inputs_targets = np.concatenate((ysig,ybkg),axis=0)
62
63 #split data in training and test data
64
65 x_train, x_test, y_train, y_test = train_test_split(
66 inputs_data, inputs_targets, test_size=0.50, random_state=1234)
67
68 return x_train, y_train, x_test, y_test
69
70def TrainModel(model, x, y, name) :
71 model.fit(x,y,epochs=5,batch_size=50)
72 modelFile = name + '.keras'
73 model.save(modelFile)
74 return modelFile
75
76### run the models
77
78x_train, y_train, x_test, y_test = PrepareData()
79
80## create models and train them
81
82model1 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_4L_50')
83model2 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_4L_200')
84model3 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_2L_500')
85
86#evaluate with SOFIE the 3 trained models
87
88
89def GenerateModelCode(modelFile, generatedHeaderFile):
91
92 print("Generating inference code for the Keras model from ",modelFile,"in the header ", generatedHeaderFile)
93 #Generating inference code using a ROOT binary file
95 # add option to append to the same file the generated headers (pass True for append flag)
96 model.OutputGenerated(generatedHeaderFile, True)
97 #model.PrintGenerated()
98 return generatedHeaderFile
99
100
101generatedHeaderFile = "Higgs_Model.hxx"
102#need to remove existing header file since we are appending on same one
103if (os.path.exists(generatedHeaderFile)):
104 print("removing existing file", generatedHeaderFile)
105 os.remove(generatedHeaderFile)
106
107weightFile = "Higgs_Model.root"
108if (os.path.exists(weightFile)):
109 print("removing existing file", weightFile)
110 os.remove(weightFile)
111
112GenerateModelCode(model1, generatedHeaderFile)
113GenerateModelCode(model2, generatedHeaderFile)
114GenerateModelCode(model3, generatedHeaderFile)
115
116#compile the generated code
117
118ROOT.gInterpreter.Declare('#include "' + generatedHeaderFile + '"')
119
120
121#run the inference on the test data
122session1 = ROOT.TMVA_SOFIE_Higgs_Model_4L_50.Session("Higgs_Model.root")
123session2 = ROOT.TMVA_SOFIE_Higgs_Model_4L_200.Session("Higgs_Model.root")
124session3 = ROOT.TMVA_SOFIE_Higgs_Model_2L_500.Session("Higgs_Model.root")
125
126hs1 = ROOT.TH1D("hs1","Signal result 4L 50",100,0,1)
127hs2 = ROOT.TH1D("hs2","Signal result 4L 200",100,0,1)
128hs3 = ROOT.TH1D("hs3","Signal result 2L 500",100,0,1)
129
130hb1 = ROOT.TH1D("hb1","Background result 4L 50",100,0,1)
131hb2 = ROOT.TH1D("hb2","Background result 4L 200",100,0,1)
132hb3 = ROOT.TH1D("hb3","Background result 2L 500",100,0,1)
133
134def EvalModel(session, x) :
135 result = session.infer(x)
136 return result[0]
137
138for i in range(0,x_test.shape[0]):
139 result1 = EvalModel(session1, x_test[i,:])
140 result2 = EvalModel(session2, x_test[i,:])
141 result3 = EvalModel(session3, x_test[i,:])
142 if (y_test[i] == 1) :
143 hs1.Fill(result1)
144 hs2.Fill(result2)
145 hs3.Fill(result3)
146 else:
147 hb1.Fill(result1)
148 hb2.Fill(result2)
149 hb3.Fill(result3)
150
151def PlotHistos(hs,hb):
152 hs.SetLineColor("kRed")
153 hb.SetLineColor("kBlue")
154 hs.Draw()
155 hb.Draw("same")
156
157c1 = ROOT.TCanvas()
158c1.Divide(1,3)
159c1.cd(1)
160PlotHistos(hs1,hb1)
161c1.cd(2)
162PlotHistos(hs2,hb2)
163c1.cd(3)
164PlotHistos(hs3,hb3)
165c1.Draw()
166
167## draw also ROC curves
168
169def GetContent(h) :
170 n = h.GetNbinsX()
171 x = ROOT.std.vector['float'](n)
172 w = ROOT.std.vector['float'](n)
173 for i in range(0,n):
174 x[i] = h.GetBinCenter(i+1)
175 w[i] = h.GetBinContent(i+1)
176 return x,w
177
178def MakeROCCurve(hs, hb) :
179 xs,ws = GetContent(hs)
180 xb,wb = GetContent(hb)
181 roc = ROOT.TMVA.ROCCurve(xs,xb,ws,wb)
182 print("ROC integral for ",hs.GetName(), roc.GetROCIntegral())
183 curve = roc.GetROCCurve()
185 return roc,curve
186
187c2 = ROOT.TCanvas()
188
189r1,curve1 = MakeROCCurve(hs1,hb1)
191curve1.Draw("AC")
192
193r2,curve2 = MakeROCCurve(hs2,hb2)
194curve2.SetLineColor("kBlue")
195curve2.Draw("C")
196
197r3,curve3 = MakeROCCurve(hs3,hb3)
198curve3.SetLineColor("kGreen")
199curve3.Draw("C")
200
201c2.Draw()
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...