Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodPyKeras.cxx
Go to the documentation of this file.
1// @(#)root/tmva/pymva $Id$
2// Author: Stefan Wunsch, 2016
3
4#include <Python.h>
6
7#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
8#include <numpy/arrayobject.h>
9
10#include "TMVA/Types.h"
11#include "TMVA/Config.h"
13#include "TMVA/Results.h"
16#include "TMVA/Tools.h"
17#include "TMVA/Timer.h"
18#include "TObjString.h"
19#include "TSystem.h"
20#include "Math/Util.h"
21
22using namespace TMVA;
23
24namespace TMVA {
25namespace Internal {
26class PyGILRAII {
27 PyGILState_STATE m_GILState;
28
29public:
32};
33} // namespace Internal
34} // namespace TMVA
35
36REGISTER_METHOD(PyKeras)
37
39
41 : PyMethodBase(jobName, Types::kPyKeras, methodTitle, dsi, theOption) {
42 fNumEpochs = 10;
43 fNumThreads = 0;
44 fBatchSize = 100;
45 fVerbose = 1;
46 fContinueTraining = false;
47 fSaveBestOnly = true;
49 fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler
50 fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)
51 fTensorBoard = ""; // empty string deactivates TensorBoard callback
52}
53
56 fNumEpochs = 10;
57 fNumThreads = 0;
58 fBatchSize = 100;
59 fVerbose = 1;
60 fContinueTraining = false;
61 fSaveBestOnly = true;
63 fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler
64 fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)
65 fTensorBoard = ""; // empty string deactivates TensorBoard callback
66}
67
70
77
78///////////////////////////////////////////////////////////////////////////////
79
81 DeclareOptionRef(fFilenameModel, "FilenameModel", "Filename of the initial Keras model");
82 DeclareOptionRef(fFilenameTrainedModel, "FilenameTrainedModel", "Filename of the trained output Keras model");
83 DeclareOptionRef(fBatchSize, "BatchSize", "Training batch size");
84 DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs");
85 DeclareOptionRef(fNumThreads, "NumThreads", "Number of CPU threads (only for Tensorflow backend)");
86 DeclareOptionRef(fGpuOptions, "GpuOptions", "GPU options for tensorflow, such as allow_growth");
87 DeclareOptionRef(fUseTFKeras, "tf.keras", "Use tensorflow from Keras");
88 DeclareOptionRef(fUseTFKeras, "tfkeras", "Use tensorflow from Keras");
89 DeclareOptionRef(fVerbose, "Verbose", "Keras verbosity during training");
90 DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training");
91 DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss");
92 DeclareOptionRef(fTriesEarlyStopping, "TriesEarlyStopping", "Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
93 DeclareOptionRef(fLearningRateSchedule, "LearningRateSchedule", "Set new learning rate during training at specific epochs, e.g., \"50,0.01;70,0.005\"");
94 DeclareOptionRef(fTensorBoard, "TensorBoard",
95 "Write a log during training to visualize and monitor the training performance with TensorBoard");
96
97 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
98 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
99 "Specify as 100 to use exactly 100 events. (Default: 20%)");
100 DeclareOptionRef(fUserCodeName = "", "UserCode",
101 "Optional python code provided by the user to be executed before loading the Keras model");
102}
103
104////////////////////////////////////////////////////////////////////////////////
105/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
106/// 100 etc.
107/// - 20% and 0.2 selects 20% of the training set as validation data.
108/// - 100 selects 100 events as the validation data.
109///
110/// @return number of samples in validation set
111///
113{
115 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
116
117 // Parsing + Validation
118 // --------------------
119 if (fNumValidationString.EndsWith("%")) {
120 // Relative spec. format 20%
121 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
122
123 if (intValStr.IsFloat()) {
124 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
125 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
126 } else {
127 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
128 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
129 }
130 } else if (fNumValidationString.IsFloat()) {
131 Double_t valSizeAsDouble = fNumValidationString.Atof();
132
133 if (valSizeAsDouble < 1.0) {
134 // Relative spec. format 0.2
135 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
136 } else {
137 // Absolute spec format 100 or 100.0
139 }
140 } else {
141 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
142 << Endl;
143 }
144
145 // Value validation
146 // ----------------
147 if (nValidationSamples < 0) {
148 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
149 }
150
151 if (nValidationSamples == 0) {
152 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
153 }
154
156 Log() << kFATAL << "Validation size \"" << fNumValidationString
157 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
158 }
159
160 return nValidationSamples;
161}
162
163/// Function processing the options
164/// This is called only when creating the method before training not when
165/// reading from XML file. Called from MethodBase::ProcessSetup
166/// that is called from Factory::BookMethod
168
169 // Set default filename for trained model if option is not used
171 fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".h5";
172 }
173
174 InitKeras();
175
176 // Setup model, either the initial model from `fFilenameModel` or
177 // the trained model from `fFilenameTrainedModel`
178 if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;
180}
181
183 // initialize first Keras. This is done only here when class has
184 // all state variable set from options or read from XML file
185 // Import Keras
186
187 if (fUseTFKeras)
188 Log() << kINFO << "Setting up tf.keras" << Endl;
189 else
190 Log() << kINFO << "Setting up keras with " << gSystem->Getenv("KERAS_BACKEND") << " backend" << Endl;
191
192 bool useTFBackend = kFALSE;
194 bool kerasIsPresent = kFALSE;
195
196 if (!fUseTFKeras) {
197 auto ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fLocalNS);
198 // need importing also in global namespace
199 if (ret != nullptr) ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fGlobalNS);
200 if (ret != nullptr)
202 if (kerasIsPresent) {
203 // check compatibility with tensorflow
204 if (GetKerasBackend() == kTensorFlow ) {
206
207 PyRunString("keras_major_version = int(keras.__version__.split('.')[0])");
208 PyRunString("keras_minor_version = int(keras.__version__.split('.')[1])");
213 Log() << kINFO << "Using Keras version " << kerasMajorVersion << "." << kerasMinorVersion << Endl;
214 // only version 2.3 is latest multi-backend version.
215 // version 2.4 is just tf.keras and should not be used in standalone and will not work in this workflow
216 // see https://github.com/keras-team/keras/releases/tag/2.4.0
217 // for example variable keras.backend.tensorflow_backend will not exist anymore in keras 2.4
219
220 }
221 } else {
222 // Keras is not found. try tyo use tf.keras
223 Log() << kINFO << "Keras is not found. Trying using tf.keras" << Endl;
224 fUseTFKeras = 1;
225 }
226 }
227
228 // import Tensoprflow (if requested or because is keras backend)
229 if (fUseTFKeras || useTFBackend) {
230 auto ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fLocalNS);
231 if (ret != nullptr) ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fGlobalNS);
232 if (ret == nullptr) {
233 Log() << kFATAL << "Importing TensorFlow failed" << Endl;
234 }
235 // check tensorflow version
236 PyRunString("tf_major_version = int(tf.__version__.split('.')[0])");
237 PyObject *pyTfVersion = PyDict_GetItemString(fLocalNS, "tf_major_version");
239 Log() << kINFO << "Using TensorFlow version " << tfVersion << Endl;
240
241 if (tfVersion < 2) {
242 if (fUseTFKeras == 1) {
243 Log() << kWARNING << "Using TensorFlow version 1.x which does not contain tf.keras - use then TensorFlow as Keras backend" << Endl;
245 // case when Keras was not found
246 if (!kerasIsPresent) {
247 Log() << kFATAL << "Keras is not present and not a suitable TensorFlow version is found " << Endl;
248 return;
249 }
250 }
251 }
252 else {
253 // using version larger than 2.0 - can use tf.keras
254 if (!kerasIsCompatible) {
255 Log() << kWARNING << "The Keras version is not compatible with TensorFlow 2. Use instead tf.keras" << Endl;
256 fUseTFKeras = 1;
257 }
258 }
259
260 // if keras 2.3 and tensorflow 2 are found. Use tf.keras or keras ?
261 // at the moment default is tf.keras=false to keep compatibility
262 // but this might change in future releases
263 if (fUseTFKeras) {
264 Log() << kINFO << "Use Keras version from TensorFlow : tf.keras" << Endl;
265 fKerasString = "tf.keras";
266 PyRunString("K = tf.keras.backend");
267 PyRun_String("K = tf.keras.backend", Py_single_input, fGlobalNS, fGlobalNS);
268 }
269 else {
270 Log() << kINFO << "Use TensorFlow as Keras backend" << Endl;
271 fKerasString = "keras";
272 PyRunString("from keras.backend import tensorflow_backend as K");
273 PyRun_String("from keras.backend import tensorflow_backend as K", Py_single_input, fGlobalNS, fGlobalNS);
274 }
275
276 // extra options for tensorflow
277 // use different naming in tf2 for ConfigProto and Session
278 TString configProto = (tfVersion >= 2) ? "tf.compat.v1.ConfigProto" : "tf.ConfigProto";
279 TString session = (tfVersion >= 2) ? "tf.compat.v1.Session" : "tf.Session";
280
281 // in case specify number of threads
283 if (num_threads > 0) {
284 Log() << kINFO << "Setting the CPU number of threads = " << num_threads << Endl;
285
287 TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
289 } else
290 PyRunString(TString::Format("session_conf = %s()", configProto.Data()));
291
292 // applying GPU options such as allow_growth=True to avoid allocating all memory on GPU
293 // that prevents running later TMVA-GPU
294 // Also new Nvidia RTX cards (e.g. RTX 2070) require this option
295 if (!fGpuOptions.IsNull()) {
297 for (int item = 0; item < optlist->GetEntries(); ++item) {
298 Log() << kINFO << "Applying GPU option: gpu_options." << optlist->At(item)->GetName() << Endl;
299 PyRunString(TString::Format("session_conf.gpu_options.%s", optlist->At(item)->GetName()));
300 }
301 }
302 PyRunString(TString::Format("sess = %s(config=session_conf)", session.Data()));
303
304 if (tfVersion < 2) {
305 PyRunString("K.set_session(sess)");
306 } else {
307 PyRunString("tf.compat.v1.keras.backend.set_session(sess)");
308 }
309 }
310 // case not using a Tensorflow backend
311 else {
312 fKerasString = "keras";
313 if (fNumThreads > 0)
314 Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as backend"
315 << Endl;
316 if (!fGpuOptions.IsNull()) {
317 Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions
318 << " when not using tensorflow as backend" << Endl;
319 }
320 }
321
322}
323
325 /*
326 * Load Keras model from file
327 */
328
329 Log() << kINFO << " Loading Keras Model " << Endl;
330
331 PyRunString("load_model_custom_objects=None");
332
333
334
335 if (!fUserCodeName.IsNull()) {
336 Log() << kINFO << " Executing user initialization code from " << fUserCodeName << Endl;
337
338
339 // run some python code provided by user for model initialization if needed
340 TString cmd = "exec(open('" + fUserCodeName + "').read())";
341 TString errmsg = "Error executing the provided user code";
343
344 PyRunString("print('custom objects for loading model : ',load_model_custom_objects)");
345 }
346
347 // Load initial model or already trained model
349 if (loadTrainedModel) {
351 }
352 else {
354 }
355
356 PyRunString("model = " + fKerasString + ".models.load_model('" + filenameLoadModel +
357 "', custom_objects=load_model_custom_objects)", "Failed to load Keras model from file: " + filenameLoadModel);
358
359 Log() << kINFO << "Loaded model from file: " << filenameLoadModel << Endl;
360
361
362 /*
363 * Init variables and weights
364 */
365
366 // Get variables, classes and target numbers
370 else Log() << kFATAL << "Selected analysis type is not implemented" << Endl;
371
372 // Mark the model as setup
373 fModelIsSetup = true;
374 fModelIsSetupForEval = false;
375}
376
377///Setting up model for evaluation
378/// Add here some needed optimizations like disabling eager execution
380
381 InitKeras();
382
383 // disable eager execution (model will evaluate > 100 faster)
384 // need to be done before loading the model
385#ifndef R__MACOSX // problem siabling eager execution on Macos (conflict with multiprocessing)
386 if (fUseTFKeras){
387 PyRunString("tf.compat.v1.disable_eager_execution()","Failed to disable eager execution");
388 Log() << kINFO << "Disabled TF eager execution when evaluating model " << Endl;
389 }
390#endif
391
392 SetupKerasModel(true);
393
394 // Init evaluation (needed for getMvaValue)
395 if (fNVars > 0) {
396 fVals.resize(fNVars); // holds values used for classification and regression
400 }
401 // setup output variables
402 if (fNOutputs > 0) {
403 fOutput.resize(fNOutputs); // holds classification probabilities or regression output
407 }
408
410}
411
412/// Initialization function called from MethodBase::SetupMethod()
413/// Note that option string are not yet filled with their values.
414/// This is done before ProcessOption method or after reading from XML file
416
418
419 if (!PyIsInitialized()) {
420 Log() << kFATAL << "Python is not initialized" << Endl;
421 }
422 _import_array(); // required to use numpy arrays
423
424 // NOTE: sys.argv has to be cleared because otherwise TensorFlow breaks
425 PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed");
426
427 // Set flag that model is not setup
428 fModelIsSetup = false;
429 fModelIsSetupForEval = false;
430}
431
433
434 if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl;
435
436 /*
437 * Load training data to numpy array
438 */
439
443
444 Log() << kINFO << "Split TMVA training data in " << nTrainingEvents << " training events and "
445 << nValEvents << " validation events" << Endl;
446
447 float* trainDataX = new float[nTrainingEvents*fNVars];
448 float* trainDataY = new float[nTrainingEvents*fNOutputs];
449 float* trainDataWeights = new float[nTrainingEvents];
450 for (UInt_t i=0; i<nTrainingEvents; i++) {
451 const TMVA::Event* e = GetTrainingEvent(i);
452 // Fill variables
453 for (UInt_t j=0; j<fNVars; j++) {
454 trainDataX[j + i*fNVars] = e->GetValue(j);
455 }
456 // Fill targets
457 // NOTE: For classification, convert class number in one-hot vector,
458 // e.g., 1 -> [0, 1] or 0 -> [1, 0] for binary classification
460 for (UInt_t j=0; j<fNOutputs; j++) {
461 trainDataY[j + i*fNOutputs] = 0;
462 }
463 trainDataY[e->GetClass() + i*fNOutputs] = 1;
464 }
465 else if (GetAnalysisType() == Types::kRegression) {
466 for (UInt_t j=0; j<fNOutputs; j++) {
467 trainDataY[j + i*fNOutputs] = e->GetTarget(j);
468 }
469 }
470 else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;
471 // Fill weights
472 // NOTE: If no weight branch is given, this defaults to ones for all events
473 trainDataWeights[i] = e->GetWeight();
474 }
475
485
486 /*
487 * Load validation data to numpy array
488 */
489
490 // NOTE: from TMVA, we get the validation data as a subset of all the training data
491 // we will not use test data for validation. They will be used for the real testing
492
493
494 float* valDataX = new float[nValEvents*fNVars];
495 float* valDataY = new float[nValEvents*fNOutputs];
496 float* valDataWeights = new float[nValEvents];
497 //validation events follows the trainig one in the TMVA training vector
498 for (UInt_t i=0; i< nValEvents ; i++) {
499 UInt_t ievt = nTrainingEvents + i; // TMVA event index
501 // Fill variables
502 for (UInt_t j=0; j<fNVars; j++) {
503 valDataX[j + i*fNVars] = e->GetValue(j);
504 }
505 // Fill targets
507 for (UInt_t j=0; j<fNOutputs; j++) {
508 valDataY[j + i*fNOutputs] = 0;
509 }
510 valDataY[e->GetClass() + i*fNOutputs] = 1;
511 }
512 else if (GetAnalysisType() == Types::kRegression) {
513 for (UInt_t j=0; j<fNOutputs; j++) {
514 valDataY[j + i*fNOutputs] = e->GetTarget(j);
515 }
516 }
517 else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;
518 // Fill weights
519 valDataWeights[i] = e->GetWeight();
520 }
521
531
532 /*
533 * Train Keras model
534 */
535 Log() << kINFO << "Training Model Summary" << Endl;
536 PyRunString("model.summary()");
537
538 // Setup parameters
539
542 PyObject* pVerbose = PyLong_FromLong(fVerbose);
545 PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
546
547 // Setup training callbacks
548 PyRunString("callbacks = []");
549
550 // Callback: Save only weights with smallest validation loss
551 if (fSaveBestOnly) {
552 PyRunString("callbacks.append(" + fKerasString +".callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly");
553 Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl;
554 }
555
556 // Callback: Stop training early if no improvement in validation loss is observed
557 if (fTriesEarlyStopping>=0) {
559 tries.Form("%i", fTriesEarlyStopping);
560 PyRunString("callbacks.append(" + fKerasString + ".callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping");
561 Log() << kINFO << "Option TriesEarlyStopping: Training will stop after " << tries << " number of epochs with no improvement of validation loss" << Endl;
562 }
563
564 // Callback: Learning rate scheduler
565 if (fLearningRateSchedule != "") {
566 // tokenize learning rate schedule (e.g. "10,0.01;20,0.001" given as epoch,lr)
567 std::vector<std::pair<std::string, std::string>> scheduleSteps;
569 for (auto obj : *lrSteps) {
570 TString step = obj->GetName();
571 auto x = step.Tokenize(",");
572 if (!x || x->GetEntries() != 2) {
573 Log() << kFATAL << "Invalid values given in LearningRateSchedule, it should be as \"10,0.1;20,0.01\""
574 << Endl;
575 }
576 scheduleSteps.push_back(std::make_pair<std::string, std::string>( std::string((*x)[0]->GetName() ) ,
577 std::string((*x)[1]->GetName() ) ) );
578 std::cout << " add learning rate schedule " << scheduleSteps.back().first << " : " << scheduleSteps.back().second << std::endl;
579 }
580 // Set scheduler function as piecewise function with given steps
581 TString epochsList = "epochs = [";
582 TString valuesList = "lrValues = [";
583 for (size_t i = 0; i < scheduleSteps.size(); i++) {
584 epochsList += TString(scheduleSteps[i].first.c_str());
585 valuesList += TString(scheduleSteps[i].second.c_str());
586 if (i < scheduleSteps.size()-1) {
587 epochsList += ", ";
588 valuesList += ", ";
589 }
590 }
591 epochsList += "]";
592 valuesList += "]";
593 TString scheduleFunction = "def schedule(epoch, lr):\n"
594 " i = 0\n"
595 " " + epochsList + "\n"
596 " " + valuesList + "\n"
597 " for e in epochs:\n"
598 " if (epoch < e) :\n"
599 " return lrValues[i]\n"
600 " i+=1\n"
601 " return lr\n";
603 "Failed to setup scheduler function with string: " + fLearningRateSchedule, Py_file_input);
604 // Setup callback
605 PyRunString("callbacks.append(" + fKerasString + ".callbacks.LearningRateScheduler(schedule, verbose=True))",
606 "Failed to setup training callback: LearningRateSchedule");
607 Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl;
608 }
609
610 // Callback: TensorBoard
611 if (fTensorBoard != "") {
612 TString logdir = TString("'") + fTensorBoard + TString("'");
614 "callbacks.append(" + fKerasString + ".callbacks.TensorBoard(log_dir=" + logdir +
615 ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
616 "Failed to setup training callback: TensorBoard");
617 Log() << kINFO << "Option TensorBoard: Log files for training monitoring are stored in: " << logdir << Endl;
618 }
619
620 // Train model
621 PyRunString("history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
622 "Failed to train model");
623
624
625 std::vector<float> fHistory; // Hold training history (val_acc or loss etc)
626 fHistory.resize(fNumEpochs); // holds training loss or accuracy output
629 PyDict_SetItemString(fLocalNS, "HistoryOutput", (PyObject*)pHistory);
630
631 // Store training history data
632 Int_t iHis=0;
633 PyRunString("number_of_keys=len(history.history.keys())");
634 PyObject* PyNkeys=PyDict_GetItemString(fLocalNS, "number_of_keys");
636 for (iHis=0; iHis<nkeys; iHis++) {
637
638 PyRunString(TString::Format("copy_string=str(list(history.history.keys())[%d])",iHis));
640 if (!stra)
641 break;
643 PyObject* str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~");
644 const char *name = PyBytes_AsString(str);
645
646 Log() << kINFO << "Getting training history for item:" << iHis << " name = " << name << Endl;
647 PyRunString(TString::Format("for i,p in enumerate(history.history[%s]):\n HistoryOutput[i]=p\n",name),
648 TString::Format("Failed to get %s from training history",name));
649 for (size_t i=0; i<fHistory.size(); i++)
650 fTrainHistory.AddValue(name,i+1,fHistory[i]);
651
652 }
653//#endif
654
655 /*
656 * Store trained model to file (only if option 'SaveBestOnly' is NOT activated,
657 * because we do not want to override the best model checkpoint)
658 */
659
660 if (!fSaveBestOnly) {
661 PyRunString("model.save('"+fFilenameTrainedModel+"', overwrite=True)",
662 "Failed to save trained model: "+fFilenameTrainedModel);
663 Log() << kINFO << "Trained model written to file: " << fFilenameTrainedModel << Endl;
664 }
665
666 /*
667 * Clean-up
668 */
669
670 delete[] trainDataX;
671 delete[] trainDataY;
672 delete[] trainDataWeights;
673 delete[] valDataX;
674 delete[] valDataY;
675 delete[] valDataWeights;
676}
677
681
683 // Cannot determine error
685
686 // Check whether the model is setup
687 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
689 // Setup the trained model
691 }
692
693 // Get signal probability (called mvaValue here)
694 const TMVA::Event* e = GetEvent();
695 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
696 int verbose = (int) Verbose();
697 std::string code = "for i,p in enumerate(model.predict(vals, verbose=" + ROOT::Math::Util::ToString(verbose)
698 + ")): output[i]=p\n";
699 PyRunString(code,"Failed to get predictions");
700
702}
703
705 // Check whether the model is setup
706 // NOTE: Unfortunately this is needed because during evaluation ProcessOptions is not called again
708 // Setup the trained model
710 }
711
712 // Load data to numpy array
713 Long64_t nEvents = Data()->GetNEvents();
714 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
715 if (firstEvt < 0) firstEvt = 0;
716 nEvents = lastEvt-firstEvt;
717
718 // use timer
719 Timer timer( nEvents, GetName(), kTRUE );
720
721 if (logProgress)
722 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
723 << "Evaluation of " << GetMethodName() << " on "
724 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
725 << " sample (" << nEvents << " events)" << Endl;
726
727 float* data = new float[nEvents*fNVars];
728 for (UInt_t i=0; i<nEvents; i++) {
729 Data()->SetCurrentEvent(i);
730 const TMVA::Event *e = GetEvent();
731 for (UInt_t j=0; j<fNVars; j++) {
732 data[j + i*fNVars] = e->GetValue(j);
733 }
734 }
735
736 std::vector<double> mvaValues(nEvents);
737 npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)fNVars};
739 if (pDataMvaValues==0) Log() << "Failed to load data to Python array" << Endl;
740
741 // Get prediction for all events
743 if (pModel==0) Log() << kFATAL << "Failed to get model Python object" << Endl;
745 if (pPredictions==0) Log() << kFATAL << "Failed to get predictions" << Endl;
746 delete[] data;
747 // Load predictions to double vector
748 // NOTE: The signal probability is given at the output
749 float* predictionsData = (float*) PyArray_DATA(pPredictions);
750
751 for (UInt_t i=0; i<nEvents; i++) {
753 }
754
755 if (logProgress) {
756 Log() << kINFO
757 << "Elapsed time for evaluation of " << nEvents << " events: "
758 << timer.GetElapsedTime() << " " << Endl;
759 }
760
761
762 return mvaValues;
763}
764
765std::vector<Float_t>& MethodPyKeras::GetRegressionValues() {
766 // Check whether the model is setup
767 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
769 // Setup the model and load weights
770 //std::cout << "setup model for evaluation" << std::endl;
771 //PyRunString("tf.compat.v1.disable_eager_execution()","Failed to disable eager execution");
773 }
774
775 // Get regression values
776 const TMVA::Event* e = GetEvent();
777 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
778 int verbose = (int) Verbose();
779 std::string code = "for i,p in enumerate(model.predict(vals, verbose=" + ROOT::Math::Util::ToString(verbose)
780 + ")): output[i]=p\n";
781 PyRunString(code,"Failed to get predictions");
782
783 // Use inverse transformation of targets to get final regression values
784 Event * eTrans = new Event(*e);
785 for (UInt_t i=0; i<fNOutputs; ++i) {
786 eTrans->SetTarget(i,fOutput[i]);
787 }
788
790 for (UInt_t i=0; i<fNOutputs; ++i) {
791 fOutput[i] = eTrans2->GetTarget(i);
792 }
793
794 return fOutput;
795}
796
797std::vector<Float_t>& MethodPyKeras::GetMulticlassValues() {
798 // Check whether the model is setup
799 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
801 // Setup the model and load weights
803 }
804
805 // Get class probabilites
806 const TMVA::Event* e = GetEvent();
807 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
808 int verbose = (int) Verbose();
809 std::string code = "for i,p in enumerate(model.predict(vals, verbose=" + ROOT::Math::Util::ToString(verbose)
810 + ")): output[i]=p\n";
811 PyRunString(code,"Failed to get predictions");
812
813 return fOutput;
814}
815
818
820// typical length of text line:
821// "|--------------------------------------------------------------|"
822 Log() << Endl;
823 Log() << "Keras is a high-level API for the Theano and Tensorflow packages." << Endl;
824 Log() << "This method wraps the training and predictions steps of the Keras" << Endl;
825 Log() << "Python package for TMVA, so that dataloading, preprocessing and" << Endl;
826 Log() << "evaluation can be done within the TMVA system. To use this Keras" << Endl;
827 Log() << "interface, you have to generate a model with Keras first. Then," << Endl;
828 Log() << "this model can be loaded and trained in TMVA." << Endl;
829 Log() << Endl;
830}
831
833 // get the keras backend
834
835 // in case we use tf.keras backend is tensorflow
836 if (UseTFKeras()) return kTensorFlow;
837
838 // check first if using tensorflow backend
839 PyRunString("keras_backend_is_set = keras.backend.backend() == \"tensorflow\"");
840 PyObject * keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
841 if (keras_backend != nullptr && keras_backend == Py_True)
842 return kTensorFlow;
843
844 PyRunString("keras_backend_is_set = keras.backend.backend() == \"theano\"");
845 keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
846 if (keras_backend != nullptr && keras_backend == Py_True)
847 return kTheano;
848
849 PyRunString("keras_backend_is_set = keras.backend.backend() == \"cntk\"");
850 keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
851 if (keras_backend != nullptr && keras_backend == Py_True)
852 return kCNTK;
853
854 return kUndefined;
855}
856
858 // get the keras backend name
860 if (type == kTensorFlow) return "TensorFlow";
861 if (type == kTheano) return "Theano";
862 if (type == kCNTK) return "CNTK";
863 return "Undefined";
864}
#define PyBytes_AsString
Definition CPyCppyy.h:64
#define REGISTER_METHOD(CLASS)
for example
_object PyObject
#define Py_single_input
#define e(i)
Definition RSha256.hxx:103
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
#define ClassImp(name)
Definition Rtypes.h:376
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2496
R__EXTERN TSystem * gSystem
Definition TSystem.h:572
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Class that contains all the data information.
Definition DataSetInfo.h:62
UInt_t GetNClasses() const
UInt_t GetNTargets() const
Types::ETreeType GetCurrentType() const
Definition DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206
Long64_t GetNTrainingEvents() const
Definition DataSet.h:68
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88
const char * GetName() const override
Definition MethodBase.h:334
Bool_t Verbose() const
Definition MethodBase.h:503
Types::EAnalysisType GetAnalysisType() const
Definition MethodBase.h:437
const TString & GetWeightFileDir() const
Definition MethodBase.h:492
const TString & GetMethodName() const
Definition MethodBase.h:331
const Event * GetEvent() const
Definition MethodBase.h:751
DataSetInfo & DataInfo() const
Definition MethodBase.h:410
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
Definition MethodBase.h:345
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition MethodBase.h:394
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
TrainingHistory fTrainHistory
Definition MethodBase.h:425
DataSet * Data() const
Definition MethodBase.h:409
const Event * GetTrainingEvent(Long64_t ievt) const
Definition MethodBase.h:771
void GetHelpMessage() const override
void ProcessOptions() override
Function processing the options This is called only when creating the method before training not when...
std::vector< float > fOutput
std::vector< Float_t > & GetRegressionValues() override
Bool_t UseTFKeras() const
EBackendType
enumeration defining the used Keras backend
void SetupKerasModel(Bool_t loadTrainedModel)
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper) override
void DeclareOptions() override
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
void TestClassification() override
initialization
void SetupKerasModelForEval()
Setting up model for evaluation Add here some needed optimizations like disabling eager execution.
std::vector< float > fVals
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) override
get all the MVA values for the events of the current Data type
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
void Train() override
TString fLearningRateSchedule
std::vector< Float_t > & GetMulticlassValues() override
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t) override
void Init() override
Initialization function called from MethodBase::SetupMethod() Note that option string are not yet fil...
void ReadModelFromFile() override
EBackendType GetKerasBackend()
Get the Keras backend (can be: TensorFlow, Theano or CNTK)
Virtual base class for all TMVA method based on Python.
static int PyIsInitialized()
Check Python interpreter initialization status.
static PyObject * fGlobalNS
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=256)
Execute Python code from string.
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
void AddValue(TString Property, Int_t stage, Double_t value)
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Definition Types.h:135
@ kMulticlass
Definition Types.h:129
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
An array of TObjects.
Definition TObjArray.h:31
Basic string class.
Definition TString.h:138
@ kTrailing
Definition TString.h:284
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition TString.cxx:2271
Bool_t IsNull() const
Definition TString.h:422
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2385
virtual const char * Getenv(const char *env)
Get environment variable.
Definition TSystem.cxx:1678
Double_t x[n]
Definition legend1.C:17
std::string ToString(const T &val)
Utility function for conversion to strings.
Definition Util.h:64
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148