Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
MethodPyKeras.cxx
Go to the documentation of this file.
1// @(#)root/tmva/pymva $Id$
2// Author: Stefan Wunsch, 2016
3
4#include <Python.h>
6
7#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
8#include <numpy/arrayobject.h>
9
10#include "TMVA/Types.h"
11#include "TMVA/Config.h"
13#include "TMVA/Results.h"
16#include "TMVA/Tools.h"
17#include "TMVA/Timer.h"
18#include "TSystem.h"
19#include "Math/Util.h"
20
21using namespace TMVA;
22
23namespace TMVA {
24namespace Internal {
25class PyGILRAII {
26 PyGILState_STATE m_GILState;
27
28public:
31};
32} // namespace Internal
33} // namespace TMVA
34
35REGISTER_METHOD(PyKeras)
36
38
40 : PyMethodBase(jobName, Types::kPyKeras, methodTitle, dsi, theOption) {
41 fNumEpochs = 10;
42 fNumThreads = 0;
43 fBatchSize = 100;
44 fVerbose = 1;
45 fContinueTraining = false;
46 fSaveBestOnly = true;
48 fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler
49 fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)
50 fTensorBoard = ""; // empty string deactivates TensorBoard callback
51}
52
55 fNumEpochs = 10;
56 fNumThreads = 0;
57 fBatchSize = 100;
58 fVerbose = 1;
59 fContinueTraining = false;
60 fSaveBestOnly = true;
62 fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler
63 fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)
64 fTensorBoard = ""; // empty string deactivates TensorBoard callback
65}
66
69
76
77///////////////////////////////////////////////////////////////////////////////
78
80 DeclareOptionRef(fFilenameModel, "FilenameModel", "Filename of the initial Keras model");
81 DeclareOptionRef(fFilenameTrainedModel, "FilenameTrainedModel", "Filename of the trained output Keras model");
82 DeclareOptionRef(fBatchSize, "BatchSize", "Training batch size");
83 DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs");
84 DeclareOptionRef(fNumThreads, "NumThreads", "Number of CPU threads (only for Tensorflow backend)");
85 DeclareOptionRef(fGpuOptions, "GpuOptions", "GPU options for tensorflow, such as allow_growth");
86 DeclareOptionRef(fUseTFKeras, "tf.keras", "Use tensorflow from Keras");
87 DeclareOptionRef(fUseTFKeras, "tfkeras", "Use tensorflow from Keras");
88 DeclareOptionRef(fVerbose, "Verbose", "Keras verbosity during training");
89 DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training");
90 DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss");
91 DeclareOptionRef(fTriesEarlyStopping, "TriesEarlyStopping", "Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
92 DeclareOptionRef(fLearningRateSchedule, "LearningRateSchedule", "Set new learning rate during training at specific epochs, e.g., \"50,0.01;70,0.005\"");
93 DeclareOptionRef(fTensorBoard, "TensorBoard",
94 "Write a log during training to visualize and monitor the training performance with TensorBoard");
95
96 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
97 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
98 "Specify as 100 to use exactly 100 events. (Default: 20%)");
99 DeclareOptionRef(fUserCodeName = "", "UserCode",
100 "Optional python code provided by the user to be executed before loading the Keras model");
101}
102
103////////////////////////////////////////////////////////////////////////////////
104/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
105/// 100 etc.
106/// - 20% and 0.2 selects 20% of the training set as validation data.
107/// - 100 selects 100 events as the validation data.
108///
109/// @return number of samples in validation set
110///
112{
114 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
115
116 // Parsing + Validation
117 // --------------------
118 if (fNumValidationString.EndsWith("%")) {
119 // Relative spec. format 20%
120 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
121
122 if (intValStr.IsFloat()) {
123 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
124 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
125 } else {
126 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
127 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
128 }
129 } else if (fNumValidationString.IsFloat()) {
130 Double_t valSizeAsDouble = fNumValidationString.Atof();
131
132 if (valSizeAsDouble < 1.0) {
133 // Relative spec. format 0.2
134 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
135 } else {
136 // Absolute spec format 100 or 100.0
138 }
139 } else {
140 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
141 << Endl;
142 }
143
144 // Value validation
145 // ----------------
146 if (nValidationSamples < 0) {
147 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
148 }
149
150 if (nValidationSamples == 0) {
151 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
152 }
153
155 Log() << kFATAL << "Validation size \"" << fNumValidationString
156 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
157 }
158
159 return nValidationSamples;
160}
161
162/// Function processing the options
163/// This is called only when creating the method before training not when
164/// reading from XML file. Called from MethodBase::ProcessSetup
165/// that is called from Factory::BookMethod
167
168 // Set default filename for trained model if option is not used
170 fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".h5";
171 }
172
173 InitKeras();
174
175 // Setup model, either the initial model from `fFilenameModel` or
176 // the trained model from `fFilenameTrainedModel`
177 if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;
179}
180
182 // initialize first Keras. This is done only here when class has
183 // all state variable set from options or read from XML file
184 // Import Keras
185
186 if (fUseTFKeras)
187 Log() << kINFO << "Setting up tf.keras" << Endl;
188 else
189 Log() << kINFO << "Setting up keras with " << gSystem->Getenv("KERAS_BACKEND") << " backend" << Endl;
190
191 bool useTFBackend = kFALSE;
193 bool kerasIsPresent = kFALSE;
194
195 if (!fUseTFKeras) {
196 auto ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fLocalNS);
197 // need importing also in global namespace
198 if (ret != nullptr) ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fGlobalNS);
199 if (ret != nullptr)
201 if (kerasIsPresent) {
202 // check compatibility with tensorflow
203 if (GetKerasBackend() == kTensorFlow ) {
205
206 PyRunString("keras_major_version = int(keras.__version__.split('.')[0])");
207 PyRunString("keras_minor_version = int(keras.__version__.split('.')[1])");
212 Log() << kINFO << "Using Keras version " << kerasMajorVersion << "." << kerasMinorVersion << Endl;
213 // only version 2.3 is latest multi-backend version.
214 // version 2.4 is just tf.keras and should not be used in standalone and will not work in this workflow
215 // see https://github.com/keras-team/keras/releases/tag/2.4.0
216 // for example variable keras.backend.tensorflow_backend will not exist anymore in keras 2.4
218
219 }
220 } else {
221 // Keras is not found. try tyo use tf.keras
222 Log() << kINFO << "Keras is not found. Trying using tf.keras" << Endl;
223 fUseTFKeras = 1;
224 }
225 }
226
227 // import Tensoprflow (if requested or because is keras backend)
228 if (fUseTFKeras || useTFBackend) {
229 auto ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fLocalNS);
230 if (ret != nullptr) ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fGlobalNS);
231 if (ret == nullptr) {
232 Log() << kFATAL << "Importing TensorFlow failed" << Endl;
233 }
234 // check tensorflow version
235 PyRunString("tf_major_version = int(tf.__version__.split('.')[0])");
236 PyObject *pyTfVersion = PyDict_GetItemString(fLocalNS, "tf_major_version");
238 Log() << kINFO << "Using TensorFlow version " << tfVersion << Endl;
239
240 if (tfVersion < 2) {
241 if (fUseTFKeras == 1) {
242 Log() << kWARNING << "Using TensorFlow version 1.x which does not contain tf.keras - use then TensorFlow as Keras backend" << Endl;
244 // case when Keras was not found
245 if (!kerasIsPresent) {
246 Log() << kFATAL << "Keras is not present and not a suitable TensorFlow version is found " << Endl;
247 return;
248 }
249 }
250 }
251 else {
252 // using version larger than 2.0 - can use tf.keras
253 if (!kerasIsCompatible) {
254 Log() << kWARNING << "The Keras version is not compatible with TensorFlow 2. Use instead tf.keras" << Endl;
255 fUseTFKeras = 1;
256 }
257 }
258
259 // if keras 2.3 and tensorflow 2 are found. Use tf.keras or keras ?
260 // at the moment default is tf.keras=false to keep compatibility
261 // but this might change in future releases
262 if (fUseTFKeras) {
263 Log() << kINFO << "Use Keras version from TensorFlow : tf.keras" << Endl;
264 fKerasString = "tf.keras";
265 PyRunString("K = tf.keras.backend");
266 PyRun_String("K = tf.keras.backend", Py_single_input, fGlobalNS, fGlobalNS);
267 }
268 else {
269 Log() << kINFO << "Use TensorFlow as Keras backend" << Endl;
270 fKerasString = "keras";
271 PyRunString("from keras.backend import tensorflow_backend as K");
272 PyRun_String("from keras.backend import tensorflow_backend as K", Py_single_input, fGlobalNS, fGlobalNS);
273 }
274
275 // extra options for tensorflow
276 // use different naming in tf2 for ConfigProto and Session
277 TString configProto = (tfVersion >= 2) ? "tf.compat.v1.ConfigProto" : "tf.ConfigProto";
278 TString session = (tfVersion >= 2) ? "tf.compat.v1.Session" : "tf.Session";
279
280 // in case specify number of threads
282 if (num_threads > 0) {
283 Log() << kINFO << "Setting the CPU number of threads = " << num_threads << Endl;
284
286 TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
288 } else
289 PyRunString(TString::Format("session_conf = %s()", configProto.Data()));
290
291 // applying GPU options such as allow_growth=True to avoid allocating all memory on GPU
292 // that prevents running later TMVA-GPU
293 // Also new Nvidia RTX cards (e.g. RTX 2070) require this option
294 if (!fGpuOptions.IsNull()) {
296 for (int item = 0; item < optlist->GetEntries(); ++item) {
297 Log() << kINFO << "Applying GPU option: gpu_options." << optlist->At(item)->GetName() << Endl;
298 PyRunString(TString::Format("session_conf.gpu_options.%s", optlist->At(item)->GetName()));
299 }
300 }
301 PyRunString(TString::Format("sess = %s(config=session_conf)", session.Data()));
302
303 if (tfVersion < 2) {
304 PyRunString("K.set_session(sess)");
305 } else {
306 PyRunString("tf.compat.v1.keras.backend.set_session(sess)");
307 }
308 }
309 // case not using a Tensorflow backend
310 else {
311 fKerasString = "keras";
312 if (fNumThreads > 0)
313 Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as backend"
314 << Endl;
315 if (!fGpuOptions.IsNull()) {
316 Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions
317 << " when not using tensorflow as backend" << Endl;
318 }
319 }
320
321}
322
324 /*
325 * Load Keras model from file
326 */
327
328 Log() << kINFO << " Loading Keras Model " << Endl;
329
330 PyRunString("load_model_custom_objects=None");
331
332
333
334 if (!fUserCodeName.IsNull()) {
335 Log() << kINFO << " Executing user initialization code from " << fUserCodeName << Endl;
336
337
338 // run some python code provided by user for model initialization if needed
339 TString cmd = "exec(open('" + fUserCodeName + "').read())";
340 TString errmsg = "Error executing the provided user code";
342
343 PyRunString("print('custom objects for loading model : ',load_model_custom_objects)");
344 }
345
346 // Load initial model or already trained model
348 if (loadTrainedModel) {
350 }
351 else {
353 }
354
355 PyRunString("model = " + fKerasString + ".models.load_model('" + filenameLoadModel +
356 "', custom_objects=load_model_custom_objects)", "Failed to load Keras model from file: " + filenameLoadModel);
357
358 Log() << kINFO << "Loaded model from file: " << filenameLoadModel << Endl;
359
360
361 /*
362 * Init variables and weights
363 */
364
365 // Get variables, classes and target numbers
369 else Log() << kFATAL << "Selected analysis type is not implemented" << Endl;
370
371 // Mark the model as setup
372 fModelIsSetup = true;
373 fModelIsSetupForEval = false;
374}
375
376///Setting up model for evaluation
377/// Add here some needed optimizations like disabling eager execution
379
380 InitKeras();
381
382 // disable eager execution (model will evaluate > 100 faster)
383 // need to be done before loading the model
384#ifndef R__MACOSX // problem siabling eager execution on Macos (conflict with multiprocessing)
385 if (fUseTFKeras){
386 PyRunString("tf.compat.v1.disable_eager_execution()","Failed to disable eager execution");
387 Log() << kINFO << "Disabled TF eager execution when evaluating model " << Endl;
388 }
389#endif
390
391 SetupKerasModel(true);
392
393 // Init evaluation (needed for getMvaValue)
394 if (fNVars > 0) {
395 fVals.resize(fNVars); // holds values used for classification and regression
399 }
400 // setup output variables
401 if (fNOutputs > 0) {
402 fOutput.resize(fNOutputs); // holds classification probabilities or regression output
406 }
407
409}
410
411/// Initialization function called from MethodBase::SetupMethod()
412/// Note that option string are not yet filled with their values.
413/// This is done before ProcessOption method or after reading from XML file
415
417
418 if (!PyIsInitialized()) {
419 Log() << kFATAL << "Python is not initialized" << Endl;
420 }
421 _import_array(); // required to use numpy arrays
422
423 // NOTE: sys.argv has to be cleared because otherwise TensorFlow breaks
424 PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed");
425
426 // Set flag that model is not setup
427 fModelIsSetup = false;
428 fModelIsSetupForEval = false;
429}
430
432
433 if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl;
434
435 /*
436 * Load training data to numpy array
437 */
438
442
443 Log() << kINFO << "Split TMVA training data in " << nTrainingEvents << " training events and "
444 << nValEvents << " validation events" << Endl;
445
446 float* trainDataX = new float[nTrainingEvents*fNVars];
447 float* trainDataY = new float[nTrainingEvents*fNOutputs];
448 float* trainDataWeights = new float[nTrainingEvents];
449 for (UInt_t i=0; i<nTrainingEvents; i++) {
450 const TMVA::Event* e = GetTrainingEvent(i);
451 // Fill variables
452 for (UInt_t j=0; j<fNVars; j++) {
453 trainDataX[j + i*fNVars] = e->GetValue(j);
454 }
455 // Fill targets
456 // NOTE: For classification, convert class number in one-hot vector,
457 // e.g., 1 -> [0, 1] or 0 -> [1, 0] for binary classification
459 for (UInt_t j=0; j<fNOutputs; j++) {
460 trainDataY[j + i*fNOutputs] = 0;
461 }
462 trainDataY[e->GetClass() + i*fNOutputs] = 1;
463 }
464 else if (GetAnalysisType() == Types::kRegression) {
465 for (UInt_t j=0; j<fNOutputs; j++) {
466 trainDataY[j + i*fNOutputs] = e->GetTarget(j);
467 }
468 }
469 else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;
470 // Fill weights
471 // NOTE: If no weight branch is given, this defaults to ones for all events
472 trainDataWeights[i] = e->GetWeight();
473 }
474
484
485 /*
486 * Load validation data to numpy array
487 */
488
489 // NOTE: from TMVA, we get the validation data as a subset of all the training data
490 // we will not use test data for validation. They will be used for the real testing
491
492
493 float* valDataX = new float[nValEvents*fNVars];
494 float* valDataY = new float[nValEvents*fNOutputs];
495 float* valDataWeights = new float[nValEvents];
496 //validation events follows the trainig one in the TMVA training vector
497 for (UInt_t i=0; i< nValEvents ; i++) {
498 UInt_t ievt = nTrainingEvents + i; // TMVA event index
500 // Fill variables
501 for (UInt_t j=0; j<fNVars; j++) {
502 valDataX[j + i*fNVars] = e->GetValue(j);
503 }
504 // Fill targets
506 for (UInt_t j=0; j<fNOutputs; j++) {
507 valDataY[j + i*fNOutputs] = 0;
508 }
509 valDataY[e->GetClass() + i*fNOutputs] = 1;
510 }
511 else if (GetAnalysisType() == Types::kRegression) {
512 for (UInt_t j=0; j<fNOutputs; j++) {
513 valDataY[j + i*fNOutputs] = e->GetTarget(j);
514 }
515 }
516 else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;
517 // Fill weights
518 valDataWeights[i] = e->GetWeight();
519 }
520
530
531 /*
532 * Train Keras model
533 */
534 Log() << kINFO << "Training Model Summary" << Endl;
535 PyRunString("model.summary()");
536
537 // Setup parameters
538
541 PyObject* pVerbose = PyLong_FromLong(fVerbose);
544 PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
545
546 // Setup training callbacks
547 PyRunString("callbacks = []");
548
549 // Callback: Save only weights with smallest validation loss
550 if (fSaveBestOnly) {
551 PyRunString("callbacks.append(" + fKerasString +".callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly");
552 Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl;
553 }
554
555 // Callback: Stop training early if no improvement in validation loss is observed
556 if (fTriesEarlyStopping>=0) {
558 tries.Form("%i", fTriesEarlyStopping);
559 PyRunString("callbacks.append(" + fKerasString + ".callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping");
560 Log() << kINFO << "Option TriesEarlyStopping: Training will stop after " << tries << " number of epochs with no improvement of validation loss" << Endl;
561 }
562
563 // Callback: Learning rate scheduler
564 if (fLearningRateSchedule!="") {
565 // Setup a python dictionary with the desired learning rate steps
566 PyRunString("strScheduleSteps = '"+fLearningRateSchedule+"'\n"
567 "schedulerSteps = {}\n"
568 "for c in strScheduleSteps.split(';'):\n"
569 " x = c.split(',')\n"
570 " schedulerSteps[int(x[0])] = float(x[1])\n",
571 "Failed to setup steps for scheduler function from string: "+fLearningRateSchedule,
573 // Set scheduler function as piecewise function with given steps
574 PyRunString("def schedule(epoch, model=model, schedulerSteps=schedulerSteps):\n"
575 " if epoch in schedulerSteps: return float(schedulerSteps[epoch])\n"
576 " else: return float(model.optimizer.lr.get_value())\n",
577 "Failed to setup scheduler function with string: "+fLearningRateSchedule,
579 // Setup callback
580 PyRunString("callbacks.append(" + fKerasString + ".callbacks.LearningRateScheduler(schedule))",
581 "Failed to setup training callback: LearningRateSchedule");
582 Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl;
583 }
584
585 // Callback: TensorBoard
586 if (fTensorBoard != "") {
587 TString logdir = TString("'") + fTensorBoard + TString("'");
589 "callbacks.append(" + fKerasString + ".callbacks.TensorBoard(log_dir=" + logdir +
590 ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
591 "Failed to setup training callback: TensorBoard");
592 Log() << kINFO << "Option TensorBoard: Log files for training monitoring are stored in: " << logdir << Endl;
593 }
594
595 // Train model
596 PyRunString("history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
597 "Failed to train model");
598
599
600 std::vector<float> fHistory; // Hold training history (val_acc or loss etc)
601 fHistory.resize(fNumEpochs); // holds training loss or accuracy output
604 PyDict_SetItemString(fLocalNS, "HistoryOutput", (PyObject*)pHistory);
605
606 // Store training history data
607 Int_t iHis=0;
608 PyRunString("number_of_keys=len(history.history.keys())");
609 PyObject* PyNkeys=PyDict_GetItemString(fLocalNS, "number_of_keys");
611 for (iHis=0; iHis<nkeys; iHis++) {
612
613 PyRunString(TString::Format("copy_string=str(list(history.history.keys())[%d])",iHis));
615 if (!stra)
616 break;
618 PyObject* str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~");
619 const char *name = PyBytes_AsString(str);
620
621 Log() << kINFO << "Getting training history for item:" << iHis << " name = " << name << Endl;
622 PyRunString(TString::Format("for i,p in enumerate(history.history[%s]):\n HistoryOutput[i]=p\n",name),
623 TString::Format("Failed to get %s from training history",name));
624 for (size_t i=0; i<fHistory.size(); i++)
625 fTrainHistory.AddValue(name,i+1,fHistory[i]);
626
627 }
628//#endif
629
630 /*
631 * Store trained model to file (only if option 'SaveBestOnly' is NOT activated,
632 * because we do not want to override the best model checkpoint)
633 */
634
635 if (!fSaveBestOnly) {
636 PyRunString("model.save('"+fFilenameTrainedModel+"', overwrite=True)",
637 "Failed to save trained model: "+fFilenameTrainedModel);
638 Log() << kINFO << "Trained model written to file: " << fFilenameTrainedModel << Endl;
639 }
640
641 /*
642 * Clean-up
643 */
644
645 delete[] trainDataX;
646 delete[] trainDataY;
647 delete[] trainDataWeights;
648 delete[] valDataX;
649 delete[] valDataY;
650 delete[] valDataWeights;
651}
652
656
658 // Cannot determine error
660
661 // Check whether the model is setup
662 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
664 // Setup the trained model
666 }
667
668 // Get signal probability (called mvaValue here)
669 const TMVA::Event* e = GetEvent();
670 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
671 int verbose = (int) Verbose();
672 std::string code = "for i,p in enumerate(model.predict(vals, verbose=" + ROOT::Math::Util::ToString(verbose)
673 + ")): output[i]=p\n";
674 PyRunString(code,"Failed to get predictions");
675
677}
678
680 // Check whether the model is setup
681 // NOTE: Unfortunately this is needed because during evaluation ProcessOptions is not called again
683 // Setup the trained model
685 }
686
687 // Load data to numpy array
688 Long64_t nEvents = Data()->GetNEvents();
689 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
690 if (firstEvt < 0) firstEvt = 0;
691 nEvents = lastEvt-firstEvt;
692
693 // use timer
694 Timer timer( nEvents, GetName(), kTRUE );
695
696 if (logProgress)
697 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
698 << "Evaluation of " << GetMethodName() << " on "
699 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
700 << " sample (" << nEvents << " events)" << Endl;
701
702 float* data = new float[nEvents*fNVars];
703 for (UInt_t i=0; i<nEvents; i++) {
704 Data()->SetCurrentEvent(i);
705 const TMVA::Event *e = GetEvent();
706 for (UInt_t j=0; j<fNVars; j++) {
707 data[j + i*fNVars] = e->GetValue(j);
708 }
709 }
710
711 std::vector<double> mvaValues(nEvents);
712 npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)fNVars};
714 if (pDataMvaValues==0) Log() << "Failed to load data to Python array" << Endl;
715
716 // Get prediction for all events
718 if (pModel==0) Log() << kFATAL << "Failed to get model Python object" << Endl;
720 if (pPredictions==0) Log() << kFATAL << "Failed to get predictions" << Endl;
721 delete[] data;
722 // Load predictions to double vector
723 // NOTE: The signal probability is given at the output
724 float* predictionsData = (float*) PyArray_DATA(pPredictions);
725
726 for (UInt_t i=0; i<nEvents; i++) {
728 }
729
730 if (logProgress) {
731 Log() << kINFO
732 << "Elapsed time for evaluation of " << nEvents << " events: "
733 << timer.GetElapsedTime() << " " << Endl;
734 }
735
736
737 return mvaValues;
738}
739
740std::vector<Float_t>& MethodPyKeras::GetRegressionValues() {
741 // Check whether the model is setup
742 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
744 // Setup the model and load weights
745 //std::cout << "setup model for evaluation" << std::endl;
746 //PyRunString("tf.compat.v1.disable_eager_execution()","Failed to disable eager execution");
748 }
749
750 // Get regression values
751 const TMVA::Event* e = GetEvent();
752 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
753 int verbose = (int) Verbose();
754 std::string code = "for i,p in enumerate(model.predict(vals, verbose=" + ROOT::Math::Util::ToString(verbose)
755 + ")): output[i]=p\n";
756 PyRunString(code,"Failed to get predictions");
757
758 // Use inverse transformation of targets to get final regression values
759 Event * eTrans = new Event(*e);
760 for (UInt_t i=0; i<fNOutputs; ++i) {
761 eTrans->SetTarget(i,fOutput[i]);
762 }
763
765 for (UInt_t i=0; i<fNOutputs; ++i) {
766 fOutput[i] = eTrans2->GetTarget(i);
767 }
768
769 return fOutput;
770}
771
772std::vector<Float_t>& MethodPyKeras::GetMulticlassValues() {
773 // Check whether the model is setup
774 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
776 // Setup the model and load weights
778 }
779
780 // Get class probabilites
781 const TMVA::Event* e = GetEvent();
782 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
783 int verbose = (int) Verbose();
784 std::string code = "for i,p in enumerate(model.predict(vals, verbose=" + ROOT::Math::Util::ToString(verbose)
785 + ")): output[i]=p\n";
786 PyRunString(code,"Failed to get predictions");
787
788 return fOutput;
789}
790
793
795// typical length of text line:
796// "|--------------------------------------------------------------|"
797 Log() << Endl;
798 Log() << "Keras is a high-level API for the Theano and Tensorflow packages." << Endl;
799 Log() << "This method wraps the training and predictions steps of the Keras" << Endl;
800 Log() << "Python package for TMVA, so that dataloading, preprocessing and" << Endl;
801 Log() << "evaluation can be done within the TMVA system. To use this Keras" << Endl;
802 Log() << "interface, you have to generate a model with Keras first. Then," << Endl;
803 Log() << "this model can be loaded and trained in TMVA." << Endl;
804 Log() << Endl;
805}
806
808 // get the keras backend
809
810 // in case we use tf.keras backend is tensorflow
811 if (UseTFKeras()) return kTensorFlow;
812
813 // check first if using tensorflow backend
814 PyRunString("keras_backend_is_set = keras.backend.backend() == \"tensorflow\"");
815 PyObject * keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
816 if (keras_backend != nullptr && keras_backend == Py_True)
817 return kTensorFlow;
818
819 PyRunString("keras_backend_is_set = keras.backend.backend() == \"theano\"");
820 keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
821 if (keras_backend != nullptr && keras_backend == Py_True)
822 return kTheano;
823
824 PyRunString("keras_backend_is_set = keras.backend.backend() == \"cntk\"");
825 keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
826 if (keras_backend != nullptr && keras_backend == Py_True)
827 return kCNTK;
828
829 return kUndefined;
830}
831
833 // get the keras backend name
835 if (type == kTensorFlow) return "TensorFlow";
836 if (type == kTheano) return "Theano";
837 if (type == kCNTK) return "CNTK";
838 return "Undefined";
839}
#define PyBytes_AsString
Definition CPyCppyy.h:64
#define REGISTER_METHOD(CLASS)
for example
_object PyObject
#define Py_single_input
#define e(i)
Definition RSha256.hxx:103
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
long long Long64_t
Definition RtypesCore.h:69
constexpr Bool_t kTRUE
Definition RtypesCore.h:93
#define ClassImp(name)
Definition Rtypes.h:374
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
R__EXTERN TSystem * gSystem
Definition TSystem.h:572
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Class that contains all the data information.
Definition DataSetInfo.h:62
UInt_t GetNClasses() const
UInt_t GetNTargets() const
Types::ETreeType GetCurrentType() const
Definition DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206
Long64_t GetNTrainingEvents() const
Definition DataSet.h:68
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88
Bool_t Verbose() const
Definition MethodBase.h:503
const char * GetName() const
Definition MethodBase.h:334
Types::EAnalysisType GetAnalysisType() const
Definition MethodBase.h:437
const TString & GetWeightFileDir() const
Definition MethodBase.h:492
const TString & GetMethodName() const
Definition MethodBase.h:331
const Event * GetEvent() const
Definition MethodBase.h:751
DataSetInfo & DataInfo() const
Definition MethodBase.h:410
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
Definition MethodBase.h:345
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition MethodBase.h:394
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
TrainingHistory fTrainHistory
Definition MethodBase.h:425
DataSet * Data() const
Definition MethodBase.h:409
const Event * GetTrainingEvent(Long64_t ievt) const
Definition MethodBase.h:771
void GetHelpMessage() const
void Init()
Initialization function called from MethodBase::SetupMethod() Note that option string are not yet fil...
std::vector< float > fOutput
virtual void TestClassification()
initialization
void ProcessOptions()
Function processing the options This is called only when creating the method before training not when...
Bool_t UseTFKeras() const
EBackendType
enumeration defining the used Keras backend
void SetupKerasModel(Bool_t loadTrainedModel)
std::vector< Float_t > & GetMulticlassValues()
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper)
void SetupKerasModelForEval()
Setting up model for evaluation Add here some needed optimizations like disabling eager execution.
std::vector< Float_t > & GetRegressionValues()
std::vector< float > fVals
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
TString fLearningRateSchedule
EBackendType GetKerasBackend()
Get the Keras backend (can be: TensorFlow, Theano or CNTK)
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
get all the MVA values for the events of the current Data type
Virtual base class for all TMVA method based on Python.
static int PyIsInitialized()
Check Python interpreter initialization status.
static PyObject * fGlobalNS
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=256)
Execute Python code from string.
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
void AddValue(TString Property, Int_t stage, Double_t value)
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Definition Types.h:135
@ kMulticlass
Definition Types.h:129
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
An array of TObjects.
Definition TObjArray.h:31
Basic string class.
Definition TString.h:139
@ kTrailing
Definition TString.h:276
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition TString.cxx:2264
Bool_t IsNull() const
Definition TString.h:414
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2378
virtual const char * Getenv(const char *env)
Get environment variable.
Definition TSystem.cxx:1677
std::string ToString(const T &val)
Utility function for conversion to strings.
Definition Util.h:64
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148