39void MakeTimeData(
int n,
int ntime,
int ndim )
45 std::vector<TH1 *>
v1(ntime);
46 std::vector<TH1 *>
v2(ntime);
48 for (
int i = 0; i < ntime; ++i) {
53 auto f1 =
new TF1(
"f1",
"gaus");
54 auto f2 =
new TF1(
"f2",
"gaus");
56 TFile f(fname,
"RECREATE");
57 TTree sgn(
"sgn",
"sgn");
58 TTree bkg(
"bkg",
"bkg");
60 std::vector<std::vector<float>> x1(ntime);
61 std::vector<std::vector<float>> x2(ntime);
63 for (
int i = 0; i < ntime; ++i) {
64 x1[i] = std::vector<float>(ndim);
65 x2[i] = std::vector<float>(ndim);
68 for (
auto i = 0; i < ntime; i++) {
69 bkg.Branch(
Form(
"vars_time%d", i),
"std::vector<float>", &x1[i]);
70 sgn.Branch(
Form(
"vars_time%d", i),
"std::vector<float>", &x2[i]);
77 std::vector<double> mean1(ntime);
78 std::vector<double> mean2(ntime);
79 std::vector<double> sigma1(ntime);
80 std::vector<double> sigma2(ntime);
81 for (
int j = 0; j < ntime; ++j) {
82 mean1[j] = 5. + 0.2 *
sin(
TMath::Pi() * j /
double(ntime));
83 mean2[j] = 5. + 0.2 *
cos(
TMath::Pi() * j /
double(ntime));
84 sigma1[j] = 4 + 0.3 *
sin(
TMath::Pi() * j /
double(ntime));
85 sigma2[j] = 4 + 0.3 *
cos(
TMath::Pi() * j /
double(ntime));
87 for (
int i = 0; i <
n; ++i) {
90 std::cout <<
"Generating event ... " << i << std::endl;
92 for (
int j = 0; j < ntime; ++j) {
98 f1->SetParameters(1, mean1[j], sigma1[j]);
99 f2->SetParameters(1, mean2[j], sigma2[j]);
101 h1->FillRandom(
"f1", 1000);
102 h2->FillRandom(
"f2", 1000);
104 for (
int k = 0; k < ndim; ++k) {
106 x1[j][k] =
h1->GetBinContent(k + 1) +
gRandom->Gaus(0, 10);
107 x2[j][k] = h2->GetBinContent(k + 1) +
gRandom->Gaus(0, 10);
116 c1->Divide(ntime, 2);
117 for (
int j = 0; j < ntime; ++j) {
121 for (
int j = 0; j < ntime; ++j) {
122 c1->cd(ntime + j + 1);
144void TMVA_RNN_Classification(
int nevts = 2000,
int use_type = 1)
147 const int ninput = 30;
148 const int ntime = 10;
149 const int batchSize = 100;
150 const int maxepochs = 20;
152 int nTotEvts = nevts;
154 bool useKeras =
true;
157 bool useTMVA_RNN =
true;
158 bool useTMVA_DNN =
true;
159 bool useTMVA_BDT =
false;
161 std::vector<std::string> rnn_types = {
"RNN",
"LSTM",
"GRU"};
162 std::vector<bool> use_rnn_type = {1, 1, 1};
163 if (use_type >=0 && use_type < 3) {
164 use_rnn_type = {0,0,0};
165 use_rnn_type[use_type] = 1;
169#ifndef R__HAS_TMVAGPU
171#ifndef R__HAS_TMVACPU
172 Warning(
"TMVA_RNN_Classification",
"TMVA is not build with GPU or CPU multi-thread support. Cannot use TMVA Deep Learning for RNN");
178 TString archString = (useGPU) ?
"GPU" :
"CPU";
180 bool writeOutputFile =
true;
184 const char *rnn_type =
"RNN";
195 gSystem->Setenv(
"OMP_NUM_THREADS",
"1");
198 if (num_threads >= 0) {
207 TString inputFileName =
"time_data_t10_d30.root";
209 bool fileExist = !
gSystem->AccessPathName(inputFileName);
213 MakeTimeData(nTotEvts,ntime, ninput);
219 Error(
"TMVA_RNN_Classification",
"Error opening input file %s - exit", inputFileName.
Data());
224 std::cout <<
"--- RNNClassification : Using input file: " << inputFile->GetName() << std::endl;
228 TFile *outputFile =
nullptr;
229 if (writeOutputFile) outputFile =
TFile::Open(outfileName,
"RECREATE");
254 "!V:!Silent:Color:DrawProgressBar:Transformations=None:!Correlations:"
255 "AnalysisType=Classification:ModelPersistence");
258 TTree *signalTree = (
TTree *)inputFile->Get(
"sgn");
259 TTree *background = (
TTree *)inputFile->Get(
"bkg");
261 const int nvar = ninput * ntime;
264 for (
auto i = 0; i < ntime; i++) {
274 std::cout <<
"number of variables is " << vars.size() << std::endl;
276 std::cout <<
v <<
",";
277 std::cout << std::endl;
279 int nTrainSig = 0.8 * nTotEvts;
280 int nTrainBkg = 0.8 * nTotEvts;
283 TString prepareOptions =
TString::Format(
"nTrain_Signal=%d:nTrain_Background=%d:SplitMode=Random:SplitSeed=100:NormMode=NumEvents:!V:!CalcCorrelations", nTrainSig, nTrainBkg);
291 std::cout <<
"prepared DATA LOADER " << std::endl;
302 for (
int i = 0; i < 3; ++i) {
304 if (!use_rnn_type[i])
307 const char *rnn_type = rnn_types[i].c_str();
321 TString layoutString =
TString(
"Layout=") + rnnLayout +
TString(
",RESHAPE|FLAT,DENSE|64|TANH,LINEAR");
325 "ConvergenceSteps=5,BatchSize=%d,TestRepetitions=1,"
326 "WeightDecay=1e-2,Regularization=None,MaxEpochs=%d,"
327 "Optimizer=ADAM,DropConfig=0.0+0.+0.+0.",
328 batchSize,maxepochs);
330 TString trainingStrategyString(
"TrainingStrategy=");
331 trainingStrategyString += trainingString1;
334 TString rnnOptions(
"!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=None:"
335 "WeightInitialization=XAVIERUNIFORM:ValidationSize=0.2:RandomSeed=1234");
337 rnnOptions.Append(
":");
338 rnnOptions.Append(inputLayoutString);
339 rnnOptions.Append(
":");
340 rnnOptions.Append(layoutString);
341 rnnOptions.Append(
":");
342 rnnOptions.Append(trainingStrategyString);
343 rnnOptions.Append(
":");
361 TString layoutString(
"Layout=DENSE|64|TANH,DENSE|TANH|64,DENSE|TANH|64,LINEAR");
363 TString trainingString1(
"LearningRate=1e-3,Momentum=0.0,Repetitions=1,"
364 "ConvergenceSteps=10,BatchSize=256,TestRepetitions=1,"
365 "WeightDecay=1e-4,Regularization=None,MaxEpochs=20"
366 "DropConfig=0.0+0.+0.+0.,Optimizer=ADAM");
367 TString trainingStrategyString(
"TrainingStrategy=");
368 trainingStrategyString += trainingString1;
371 TString dnnOptions(
"!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=None:"
372 "WeightInitialization=XAVIER:RandomSeed=0");
374 dnnOptions.Append(
":");
375 dnnOptions.Append(inputLayoutString);
376 dnnOptions.Append(
":");
377 dnnOptions.Append(layoutString);
378 dnnOptions.Append(
":");
379 dnnOptions.Append(trainingStrategyString);
380 dnnOptions.Append(
":");
381 dnnOptions.Append(archString);
396 for (
int i = 0; i < 3; i++) {
398 if (use_rnn_type[i]) {
403 Info(
"TMVA_RNN_Classification",
"Building recurrent keras model using a %s layer", rnn_types[i].c_str());
407 m.AddLine(
"import tensorflow");
408 m.AddLine(
"from tensorflow.keras.models import Sequential");
409 m.AddLine(
"from tensorflow.keras.optimizers import Adam");
410 m.AddLine(
"from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, SimpleRNN, GRU, LSTM, Reshape, "
411 "BatchNormalization");
413 m.AddLine(
"model = Sequential() ");
414 m.AddLine(
"model.add(Reshape((10, 30), input_shape = (10*30, )))");
416 if (rnn_types[i] ==
"LSTM")
417 m.AddLine(
"model.add(LSTM(units=10, return_sequences=True) )");
418 else if (rnn_types[i] ==
"GRU")
419 m.AddLine(
"model.add(GRU(units=10, return_sequences=True) )");
421 m.AddLine(
"model.add(SimpleRNN(units=10, return_sequences=True) )");
424 m.AddLine(
"model.add(Flatten())");
425 m.AddLine(
"model.add(Dense(64, activation = 'tanh')) ");
426 m.AddLine(
"model.add(Dense(2, activation = 'sigmoid')) ");
428 "model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.001), weighted_metrics = ['accuracy'])");
430 m.AddLine(
"model.save(modelName)");
431 m.AddLine(
"model.summary()");
433 m.SaveSource(
"make_rnn_model.py");
437 gSystem->Exec(python_exe +
" make_rnn_model.py");
439 if (
gSystem->AccessPathName(modelName)) {
440 Warning(
"TMVA_RNN_Classification",
"Error creating Keras recurrent model file - Skip using Keras");
444 Info(
"TMVA_RNN_Classification",
"Booking Keras %s model", rnn_types[i].c_str());
448 "FilenameTrainedModel=%s:NumEpochs=%d:BatchSize=%d",
449 modelName.
Data(), trainedModelName.
Data(), maxepochs, batchSize));
456 if (!useKeras || !useTMVA_BDT)
466 "!H:!V:NTrees=100:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:"
467 "BaggedSampleFraction=0.5:nCuts=20:"
489 if (outputFile) outputFile->
Close();
Error("WriteTObject","The current directory (%s) is not associated with a file. The object (%s) has not been written.", GetName(), objname)
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
A specialized string object used for TTree selections.
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
void Close(Option_t *option="") override
Close a file.
1-D histogram with a double per channel (see TH1 documentation)
static Config & Instance()
static function: returns TMVA instance
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & GetDataSetInfo()
std::vector< TString > GetListOfVariables() const
returns list of variables
This is the main MVA steering class.
void TrainAllMethods()
Iterates through all booked methods and calls training.
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
MethodBase * BookMethod(DataLoader *loader, MethodName theMethodName, TString methodTitle, TString theOption="")
Books an MVA classifier or regression method.
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
static void PyInitialize()
Initialize Python interpreter.
Class supporting a collection of lines with C++ code.
const char * Data() const
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
RVec< PromoteType< T > > cos(const RVec< T > &v)
RVec< PromoteType< T > > sin(const RVec< T > &v)
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.