This is an example of using a RNN in TMVA. We do classification using a toy time dependent data set that is generated when running this example macro
import ROOT
num_threads = 4
if "imt" in ROOT.gROOT.GetConfigFeatures():
ROOT.gSystem.Setenv("OMP_NUM_THREADS", "1")
else:
print("Running in serial mode since ROOT does not support MT")
TMVA = ROOT.TMVA
TFile = ROOT.TFile
import os
def MakeTimeData(n, ntime, ndim):
fname = "time_data_t" + str(ntime) + "_d" + str(ndim) + ".root"
v1 = []
v2 = []
for i in range(ntime):
v1.append(ROOT.TH1D("h1_" + str(i), "h1", ndim, 0, 10))
v2.append(ROOT.TH1D("h2_" + str(i), "h2", ndim, 0, 10))
f1 = ROOT.TF1("f1", "gaus")
f2 = ROOT.TF1("f2", "gaus")
sgn = ROOT.TTree("sgn", "sgn")
bkg = ROOT.TTree("bkg", "bkg")
f =
TFile(fname,
"RECREATE")
x1 = []
x2 = []
for i in range(ntime):
x1.append(ROOT.std.vector["float"](ndim))
x2.append(ROOT.std.vector["float"](ndim))
for i in range(ntime):
bkg.Branch("vars_time" + str(i), "std::vector<float>", x1[i])
sgn.Branch("vars_time" + str(i), "std::vector<float>", x2[i])
sgn.SetDirectory(f)
bkg.SetDirectory(f)
ROOT.gRandom.SetSeed(0)
mean1 = ROOT.std.vector["double"](ntime)
mean2 = ROOT.std.vector["double"](ntime)
sigma1 = ROOT.std.vector["double"](ntime)
sigma2 = ROOT.std.vector["double"](ntime)
for j in range(ntime):
mean1[j] = 5.0 + 0.2 * ROOT.TMath.Sin(ROOT.TMath.Pi() * j / float(ntime))
mean2[j] = 5.0 + 0.2 * ROOT.TMath.Cos(ROOT.TMath.Pi() * j / float(ntime))
sigma1[j] = 4 + 0.3 * ROOT.TMath.Sin(ROOT.TMath.Pi() * j / float(ntime))
sigma2[j] = 4 + 0.3 * ROOT.TMath.Cos(ROOT.TMath.Pi() * j / float(ntime))
for i in range(n):
if i % 1000 == 0:
print("Generating event ... %d", i)
for j in range(ntime):
h1 = v1[j]
h2 = v2[j]
h1.Reset()
h2.Reset()
f1.SetParameters(1, mean1[j], sigma1[j])
f2.SetParameters(1, mean2[j], sigma2[j])
h1.FillRandom(f1, 1000)
h2.FillRandom(f2, 1000)
for k in range(ntime):
x1[j][k] = h1.GetBinContent(k + 1) + ROOT.gRandom.Gaus(0, 10)
x2[j][k] = h2.GetBinContent(k + 1) + ROOT.gRandom.Gaus(0, 10)
sgn.Fill()
bkg.Fill()
if n == 1:
c1 = ROOT.TCanvas()
c1.Divide(ntime, 2)
for j in range(ntime):
c1.cd(j + 1)
for j in range(ntime):
c1.cd(ntime + j + 1)
ROOT.gPad.Update()
if n > 1:
sgn.Write()
bkg.Write()
sgn.Print()
bkg.Print()
f.Close()
use_type = 1
ninput = 30
ntime = 10
batchSize = 100
maxepochs = 10
nTotEvts = 2000
useKeras = False
useTMVA_RNN = True
useTMVA_DNN = True
useTMVA_BDT = False
if ROOT.gSystem.GetFromPipe("root-config --has-tmva-pymva") == "yes":
useKeras = True
if useKeras:
try:
pass
except:
ROOT.Warning("TMVA_RNN_Classification", "Skip using Keras since tensorflow cannot be imported")
useKeras = False
rnn_types = ["RNN", "LSTM", "GRU"]
use_rnn_type = [1, 1, 1]
if 0 <= use_type < 3:
use_rnn_type = [0, 0, 0]
use_rnn_type[use_type] = 1
useGPU = True
useGPU = "tmva-gpu" in ROOT.gROOT.GetConfigFeatures()
useTMVA_RNN = ("tmva-cpu" in ROOT.gROOT.GetConfigFeatures()) or useGPU
if useTMVA_RNN:
ROOT.Warning(
"TMVA_RNN_Classification",
"TMVA is not build with GPU or CPU multi-thread support. Cannot use TMVA Deep Learning for RNN",
)
archString = "GPU" if useGPU else "CPU"
writeOutputFile = True
rnn_type = "RNN"
if "tmva-pymva" in ROOT.gROOT.GetConfigFeatures():
else:
useKeras = False
inputFileName = "time_data_t10_d30.root"
fileDoesNotExist = ROOT.gSystem.AccessPathName(inputFileName)
if fileDoesNotExist:
MakeTimeData(nTotEvts, ntime, ninput)
if inputFile is None:
raise ROOT.Error("Error opening input file %s - exit", inputFileName.Data())
print("--- RNNClassification : Using input file: {}".format(inputFile.GetName()))
outfileName = "data_RNN_" + archString + ".root"
outputFile = None
if writeOutputFile:
"TMVAClassification",
outputFile,
V=False,
Silent=False,
Color=True,
DrawProgressBar=True,
Transformations=None,
Correlations=False,
AnalysisType="Classification",
ModelPersistence=True,
)
signalTree = inputFile.Get("sgn")
background = inputFile.Get("bkg")
nvar = ninput * ntime
for i in range(ntime):
dataloader.AddVariablesArray("vars_time" + str(i), ninput)
dataloader.AddSignalTree(signalTree, 1.0)
dataloader.AddBackgroundTree(background, 1.0)
datainfo = dataloader.GetDataSetInfo()
vars = datainfo.GetListOfVariables()
print("number of variables is {}".format(vars.size()))
for v in vars:
print(v)
nTrainSig = 0.8 * nTotEvts
nTrainBkg = 0.8 * nTotEvts
mycuts = ""
mycutb = ""
dataloader.PrepareTrainingAndTestTree(
mycuts,
mycutb,
nTrain_Signal=nTrainSig,
nTrain_Background=nTrainBkg,
SplitMode="Random",
SplitSeed=100,
NormMode="NumEvents",
V=False,
CalcCorrelations=False,
)
print("prepared DATA LOADER ")
if useTMVA_RNN:
for i in range(3):
if not use_rnn_type[i]:
continue
rnn_type = rnn_types[i]
rnnLayout = str(rnn_type) + "|10|" + str(ninput) + "|" + str(ntime) + "|0|1,RESHAPE|FLAT,DENSE|64|TANH,LINEAR"
trainingString1 = "LearningRate=1e-3,Momentum=0.0,Repetitions=1,ConvergenceSteps=5,BatchSize=" + str(batchSize)
trainingString1 += ",TestRepetitions=1,WeightDecay=1e-2,Regularization=None,MaxEpochs=" + str(maxepochs)
trainingString1 += "Optimizer=ADAM,DropConfig=0.0+0.+0.+0."
rnnName = "TMVA_" + str(rnn_type)
factory.BookMethod(
dataloader,
TMVA.Types.kDL,
rnnName,
H=False,
V=True,
ErrorStrategy="CROSSENTROPY",
VarTransform=None,
WeightInitialization="XAVIERUNIFORM",
ValidationSize=0.2,
RandomSeed=1234,
InputLayout=str(ntime) + "|" + str(ninput),
Layout=rnnLayout,
TrainingStrategy=trainingString1,
Architecture=archString
)
if useTMVA_DNN:
trainingString1 = ROOT.TString(
"LearningRate=1e-3,Momentum=0.0,Repetitions=1,"
"ConvergenceSteps=10,BatchSize=256,TestRepetitions=1,"
"WeightDecay=1e-4,Regularization=None,MaxEpochs=20"
"DropConfig=0.0+0.+0.+0.,Optimizer=ADAM:"
)
trainingString1.Append(archString)
dnnName = "TMVA_DNN"
factory.BookMethod(
dataloader,
TMVA.Types.kDL,
dnnName,
H=False,
V=True,
ErrorStrategy="CROSSENTROPY",
VarTransform=None,
WeightInitialization="XAVIER",
RandomSeed=0,
InputLayout="1|1|" + str(ntime * ninput),
Layout="DENSE|64|TANH,DENSE|TANH|64,DENSE|TANH|64,LINEAR",
TrainingStrategy=trainingString1
)
if useKeras:
for i in range(3):
if use_rnn_type[i]:
modelName = "model_" + rnn_types[i] + ".keras"
trainedModelName = "trained_" + modelName
print("Building recurrent keras model using a", rnn_types[i], "layer")
from tensorflow.keras.layers import (
GRU,
LSTM,
Dense,
Flatten,
Reshape,
SimpleRNN,
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
model = Sequential()
model.add(Reshape((10, 30), input_shape=(10 * 30,)))
if rnn_types[i] == "LSTM":
model.add(LSTM(units=10, return_sequences=True))
elif rnn_types[i] == "GRU":
model.add(GRU(units=10, return_sequences=True))
else:
model.add(SimpleRNN(units=10, return_sequences=True))
model.add(Flatten())
model.add(Dense(64, activation="tanh"))
model.add(Dense(2, activation="sigmoid"))
model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"])
model.save(modelName)
model.summary()
print("saved recurrent model", modelName)
if not os.path.exists(modelName):
useKeras = False
print("Error creating Keras recurrent model file - Skip using Keras")
else:
print("Booking Keras model ", rnn_types[i])
factory.BookMethod(
dataloader,
TMVA.Types.kPyKeras,
"PyKeras_" + rnn_types[i],
H=True,
V=False,
VarTransform=None,
FilenameModel=modelName,
FilenameTrainedModel="trained_" + modelName,
NumEpochs=maxepochs,
BatchSize=batchSize
)
if not useKeras or not useTMVA_BDT:
useTMVA_BDT = True
if useTMVA_BDT:
factory.BookMethod(
dataloader,
TMVA.Types.kBDT,
"BDTG",
H=True,
V=False,
NTrees=100,
MinNodeSize="2.5%",
BoostType="Grad",
Shrinkage=0.10,
UseBaggedBoost=True,
BaggedSampleFraction=0.5,
nCuts=20,
MaxDepth=2,
)
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
c1 = factory.GetROCCurve(dataloader)
c1.Draw()
if outputFile:
outputFile.Close()
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
static Config & Instance()
static function: returns TMVA instance
This is the main MVA steering class.
static void PyInitialize()
Initialize Python interpreter.
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.