Logo ROOT   6.18/05
Reference Guide
MethodPyKeras.cxx
Go to the documentation of this file.
1// @(#)root/tmva/pymva $Id$
2// Author: Stefan Wunsch, 2016
3
4#include <Python.h>
6
7#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
8#include <numpy/arrayobject.h>
9
10#include "TMVA/Types.h"
11#include "TMVA/Config.h"
13#include "TMVA/Results.h"
16#include "TMVA/Tools.h"
17#include "TMVA/Timer.h"
18
19using namespace TMVA;
20
21namespace TMVA {
22namespace Internal {
23class PyGILRAII {
24 PyGILState_STATE m_GILState;
25
26public:
27 PyGILRAII() : m_GILState(PyGILState_Ensure()) {}
28 ~PyGILRAII() { PyGILState_Release(m_GILState); }
29};
30} // namespace Internal
31} // namespace TMVA
32
33REGISTER_METHOD(PyKeras)
34
36
37MethodPyKeras::MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption)
38 : PyMethodBase(jobName, Types::kPyKeras, methodTitle, dsi, theOption) {
39 fNumEpochs = 10;
40 fBatchSize = 100;
41 fVerbose = 1;
42 fContinueTraining = false;
43 fSaveBestOnly = true;
45 fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler
46 fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)
47 fTensorBoard = ""; // empty string deactivates TensorBoard callback
48}
49
50MethodPyKeras::MethodPyKeras(DataSetInfo &theData, const TString &theWeightFile)
51 : PyMethodBase(Types::kPyKeras, theData, theWeightFile) {
52 fNumEpochs = 10;
53 fNumThreads = 0;
54 fBatchSize = 100;
55 fVerbose = 1;
56 fContinueTraining = false;
57 fSaveBestOnly = true;
59 fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler
60 fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)
61 fTensorBoard = ""; // empty string deactivates TensorBoard callback
62}
63
65}
66
68 if (type == Types::kRegression) return kTRUE;
69 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
70 if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
71 return kFALSE;
72}
73
74///////////////////////////////////////////////////////////////////////////////
75
77 DeclareOptionRef(fFilenameModel, "FilenameModel", "Filename of the initial Keras model");
78 DeclareOptionRef(fFilenameTrainedModel, "FilenameTrainedModel", "Filename of the trained output Keras model");
79 DeclareOptionRef(fBatchSize, "BatchSize", "Training batch size");
80 DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs");
81 DeclareOptionRef(fNumThreads, "NumThreads", "Number of CPU threads (only for Tensorflow backend)");
82 DeclareOptionRef(fGpuOptions, "GpuOptions", "GPU options for tensorflow, such as allow_growth");
83 DeclareOptionRef(fVerbose, "Verbose", "Keras verbosity during training");
84 DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training");
85 DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss");
86 DeclareOptionRef(fTriesEarlyStopping, "TriesEarlyStopping", "Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
87 DeclareOptionRef(fLearningRateSchedule, "LearningRateSchedule", "Set new learning rate during training at specific epochs, e.g., \"50,0.01;70,0.005\"");
88 DeclareOptionRef(fTensorBoard, "TensorBoard",
89 "Write a log during training to visualize and monitor the training performance with TensorBoard");
90 DeclareOptionRef(fTensorBoard, "TensorBoard",
91 "Write a log during training to visualize and monitor the training performance with TensorBoard");
92
93 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
94 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
95 "Specify as 100 to use exactly 100 events. (Default: 20%)");
96
97}
98
99
100////////////////////////////////////////////////////////////////////////////////
101/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
102/// 100 etc.
103/// - 20% and 0.2 selects 20% of the training set as validation data.
104/// - 100 selects 100 events as the validation data.
105///
106/// @return number of samples in validation set
107///
109{
110 Int_t nValidationSamples = 0;
111 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
112
113 // Parsing + Validation
114 // --------------------
115 if (fNumValidationString.EndsWith("%")) {
116 // Relative spec. format 20%
117 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
118
119 if (intValStr.IsFloat()) {
120 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
121 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
122 } else {
123 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
124 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
125 }
126 } else if (fNumValidationString.IsFloat()) {
127 Double_t valSizeAsDouble = fNumValidationString.Atof();
128
129 if (valSizeAsDouble < 1.0) {
130 // Relative spec. format 0.2
131 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
132 } else {
133 // Absolute spec format 100 or 100.0
134 nValidationSamples = valSizeAsDouble;
135 }
136 } else {
137 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
138 << Endl;
139 }
140
141 // Value validation
142 // ----------------
143 if (nValidationSamples < 0) {
144 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
145 }
146
147 if (nValidationSamples == 0) {
148 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
149 }
150
151 if (nValidationSamples >= (Int_t)trainingSetSize) {
152 Log() << kFATAL << "Validation size \"" << fNumValidationString
153 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
154 }
155
156 return nValidationSamples;
157}
158
160 // Set default filename for trained model if option is not used
162 fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".h5";
163 }
164
165 // set here some specific options for Tensorflow backend
166 // - when using tensorflow gpu set option to allow memory growth to avoid allocating all memory
167 // - set up number of threads for CPU if NumThreads option was specified
168
169 // check first if using tensorflow backend
170 if (GetKerasBackend() == kTensorFlow) {
171 Log() << kINFO << "Using TensorFlow backend - setting special configuration options " << Endl;
172 PyRunString("import tensorflow as tf");
173 PyRunString("from keras.backend import tensorflow_backend as K");
174 // in case specify number of threads
175 int num_threads = fNumThreads;
176 if (num_threads > 0) {
177 Log() << kINFO << "Setting the CPU number of threads = " << num_threads << Endl;
178 PyRunString(TString::Format("session_conf = tf.ConfigProto(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
179 num_threads,num_threads));
180 }
181 else
182 PyRunString("session_conf = tf.ConfigProto()");
183
184 // applying GPU options such as allow_growth=True to avoid allocating all memory on GPU
185 // that prevents running later TMVA-GPU
186 // Also new Nvidia RTX cards (e.g. RTX 2070) require this option
187 if (!fGpuOptions.IsNull() ) {
188 TObjArray * optlist = fGpuOptions.Tokenize(",");
189 for (int item = 0; item < optlist->GetEntries(); ++item) {
190 Log() << kINFO << "Applying GPU option: gpu_options." << optlist->At(item)->GetName() << Endl;
191 PyRunString(TString::Format("session_conf.gpu_options.%s", optlist->At(item)->GetName()));
192 }
193 }
194 PyRunString("sess = tf.Session(config=session_conf)");
195 PyRunString("K.set_session(sess)");
196 }
197 else {
198 if (fNumThreads > 0)
199 Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as backend" << Endl;
200 if (!fGpuOptions.IsNull() ) {
201 Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions << " when not using tensorflow as backend" << Endl;
202 }
203 }
204
205 // Setup model, either the initial model from `fFilenameModel` or
206 // the trained model from `fFilenameTrainedModel`
207 if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;
209}
210
211void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
212 /*
213 * Load Keras model from file
214 */
215
216 // Load initial model or already trained model
217 TString filenameLoadModel;
218 if (loadTrainedModel) {
219 filenameLoadModel = fFilenameTrainedModel;
220 }
221 else {
222 filenameLoadModel = fFilenameModel;
223 }
224 PyRunString("model = keras.models.load_model('"+filenameLoadModel+"')",
225 "Failed to load Keras model from file: "+filenameLoadModel);
226 Log() << kINFO << "Load model from file: " << filenameLoadModel << Endl;
227
228 /*
229 * Init variables and weights
230 */
231
232 // Get variables, classes and target numbers
236 else Log() << kFATAL << "Selected analysis type is not implemented" << Endl;
237
238 // Init evaluation (needed for getMvaValue)
239 fVals = new float[fNVars]; // holds values used for classification and regression
240 npy_intp dimsVals[2] = {(npy_intp)1, (npy_intp)fNVars};
241 PyArrayObject* pVals = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (void*)fVals);
242 PyDict_SetItemString(fLocalNS, "vals", (PyObject*)pVals);
243
244 fOutput.resize(fNOutputs); // holds classification probabilities or regression output
245 npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)fNOutputs};
246 PyArrayObject* pOutput = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (void*)&fOutput[0]);
247 PyDict_SetItemString(fLocalNS, "output", (PyObject*)pOutput);
248
249 // Mark the model as setup
250 fModelIsSetup = true;
251}
252
254
255 TMVA::Internal::PyGILRAII raii;
256
257 if (!PyIsInitialized()) {
258 Log() << kFATAL << "Python is not initialized" << Endl;
259 }
260 _import_array(); // required to use numpy arrays
261
262 // Import Keras
263 // NOTE: sys.argv has to be cleared because otherwise TensorFlow breaks
264 PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed");
265 PyRunString("import keras", "Import Keras failed");
266
267 // Set flag that model is not setup
268 fModelIsSetup = false;
269}
270
272 if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl;
273
274 /*
275 * Load training data to numpy array
276 */
277
278 UInt_t nAllEvents = Data()->GetNTrainingEvents();
279 UInt_t nValEvents = GetNumValidationSamples();
280 UInt_t nTrainingEvents = nAllEvents - nValEvents;
281
282 Log() << kINFO << "Split TMVA training data in " << nTrainingEvents << " training events and "
283 << nValEvents << " validation events" << Endl;
284
285 float* trainDataX = new float[nTrainingEvents*fNVars];
286 float* trainDataY = new float[nTrainingEvents*fNOutputs];
287 float* trainDataWeights = new float[nTrainingEvents];
288 for (UInt_t i=0; i<nTrainingEvents; i++) {
289 const TMVA::Event* e = GetTrainingEvent(i);
290 // Fill variables
291 for (UInt_t j=0; j<fNVars; j++) {
292 trainDataX[j + i*fNVars] = e->GetValue(j);
293 }
294 // Fill targets
295 // NOTE: For classification, convert class number in one-hot vector,
296 // e.g., 1 -> [0, 1] or 0 -> [1, 0] for binary classification
298 for (UInt_t j=0; j<fNOutputs; j++) {
299 trainDataY[j + i*fNOutputs] = 0;
300 }
301 trainDataY[e->GetClass() + i*fNOutputs] = 1;
302 }
303 else if (GetAnalysisType() == Types::kRegression) {
304 for (UInt_t j=0; j<fNOutputs; j++) {
305 trainDataY[j + i*fNOutputs] = e->GetTarget(j);
306 }
307 }
308 else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;
309 // Fill weights
310 // NOTE: If no weight branch is given, this defaults to ones for all events
311 trainDataWeights[i] = e->GetWeight();
312 }
313
314 npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNVars};
315 npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNOutputs};
316 npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents};
317 PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (void*)trainDataX);
318 PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (void*)trainDataY);
319 PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (void*)trainDataWeights);
320 PyDict_SetItemString(fLocalNS, "trainX", (PyObject*)pTrainDataX);
321 PyDict_SetItemString(fLocalNS, "trainY", (PyObject*)pTrainDataY);
322 PyDict_SetItemString(fLocalNS, "trainWeights", (PyObject*)pTrainDataWeights);
323
324 /*
325 * Load validation data to numpy array
326 */
327
328 // NOTE: from TMVA, we get the validation data as a subset of all the training data
329 // we will not use test data for validation. They will be used for the real testing
330
331
332 float* valDataX = new float[nValEvents*fNVars];
333 float* valDataY = new float[nValEvents*fNOutputs];
334 float* valDataWeights = new float[nValEvents];
335 //validation events follows the trainig one in the TMVA training vector
336 for (UInt_t i=0; i< nValEvents ; i++) {
337 UInt_t ievt = nTrainingEvents + i; // TMVA event index
338 const TMVA::Event* e = GetTrainingEvent(ievt);
339 // Fill variables
340 for (UInt_t j=0; j<fNVars; j++) {
341 valDataX[j + i*fNVars] = e->GetValue(j);
342 }
343 // Fill targets
345 for (UInt_t j=0; j<fNOutputs; j++) {
346 valDataY[j + i*fNOutputs] = 0;
347 }
348 valDataY[e->GetClass() + i*fNOutputs] = 1;
349 }
350 else if (GetAnalysisType() == Types::kRegression) {
351 for (UInt_t j=0; j<fNOutputs; j++) {
352 valDataY[j + i*fNOutputs] = e->GetTarget(j);
353 }
354 }
355 else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;
356 // Fill weights
357 valDataWeights[i] = e->GetWeight();
358 }
359
360 npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)fNVars};
361 npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)fNOutputs};
362 npy_intp dimsValWeights[1] = {(npy_intp)nValEvents};
363 PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (void*)valDataX);
364 PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (void*)valDataY);
365 PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (void*)valDataWeights);
366 PyDict_SetItemString(fLocalNS, "valX", (PyObject*)pValDataX);
367 PyDict_SetItemString(fLocalNS, "valY", (PyObject*)pValDataY);
368 PyDict_SetItemString(fLocalNS, "valWeights", (PyObject*)pValDataWeights);
369
370 /*
371 * Train Keras model
372 */
373
374 // Setup parameters
375
376 PyObject* pBatchSize = PyLong_FromLong(fBatchSize);
377 PyObject* pNumEpochs = PyLong_FromLong(fNumEpochs);
378 PyObject* pVerbose = PyLong_FromLong(fVerbose);
379 PyDict_SetItemString(fLocalNS, "batchSize", pBatchSize);
380 PyDict_SetItemString(fLocalNS, "numEpochs", pNumEpochs);
381 PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
382
383 // Setup training callbacks
384 PyRunString("callbacks = []");
385
386 // Callback: Save only weights with smallest validation loss
387 if (fSaveBestOnly) {
388 PyRunString("callbacks.append(keras.callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly");
389 Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl;
390 }
391
392 // Callback: Stop training early if no improvement in validation loss is observed
393 if (fTriesEarlyStopping>=0) {
394 TString tries;
395 tries.Form("%i", fTriesEarlyStopping);
396 PyRunString("callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping");
397 Log() << kINFO << "Option TriesEarlyStopping: Training will stop after " << tries << " number of epochs with no improvement of validation loss" << Endl;
398 }
399
400 // Callback: Learning rate scheduler
401 if (fLearningRateSchedule!="") {
402 // Setup a python dictionary with the desired learning rate steps
403 PyRunString("strScheduleSteps = '"+fLearningRateSchedule+"'\n"
404 "schedulerSteps = {}\n"
405 "for c in strScheduleSteps.split(';'):\n"
406 " x = c.split(',')\n"
407 " schedulerSteps[int(x[0])] = float(x[1])\n",
408 "Failed to setup steps for scheduler function from string: "+fLearningRateSchedule,
409 Py_file_input);
410 // Set scheduler function as piecewise function with given steps
411 PyRunString("def schedule(epoch, model=model, schedulerSteps=schedulerSteps):\n"
412 " if epoch in schedulerSteps: return float(schedulerSteps[epoch])\n"
413 " else: return float(model.optimizer.lr.get_value())\n",
414 "Failed to setup scheduler function with string: "+fLearningRateSchedule,
415 Py_file_input);
416 // Setup callback
417 PyRunString("callbacks.append(keras.callbacks.LearningRateScheduler(schedule))",
418 "Failed to setup training callback: LearningRateSchedule");
419 Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl;
420 }
421
422 // Callback: TensorBoard
423 if (fTensorBoard != "") {
424 TString logdir = TString("'") + fTensorBoard + TString("'");
426 "callbacks.append(keras.callbacks.TensorBoard(log_dir=" + logdir +
427 ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
428 "Failed to setup training callback: TensorBoard");
429 Log() << kINFO << "Option TensorBoard: Log files for training monitoring are stored in: " << logdir << Endl;
430 }
431
432 // Train model
433 PyRunString("history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
434 "Failed to train model");
435
436 /*
437 * Store trained model to file (only if option 'SaveBestOnly' is NOT activated,
438 * because we do not want to override the best model checkpoint)
439 */
440
441 if (!fSaveBestOnly) {
442 PyRunString("model.save('"+fFilenameTrainedModel+"', overwrite=True)",
443 "Failed to save trained model: "+fFilenameTrainedModel);
444 Log() << kINFO << "Trained model written to file: " << fFilenameTrainedModel << Endl;
445 }
446
447 /*
448 * Clean-up
449 */
450
451 delete[] trainDataX;
452 delete[] trainDataY;
453 delete[] trainDataWeights;
454 delete[] valDataX;
455 delete[] valDataY;
456 delete[] valDataWeights;
457}
458
461}
462
464 // Cannot determine error
465 NoErrorCalc(errLower, errUpper);
466
467 // Check whether the model is setup
468 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
469 if (!fModelIsSetup) {
470 // Setup the trained model
471 SetupKerasModel(true);
472 }
473
474 // Get signal probability (called mvaValue here)
475 const TMVA::Event* e = GetEvent();
476 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
477 PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",
478 "Failed to get predictions");
479
481}
482
483std::vector<Double_t> MethodPyKeras::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) {
484 // Check whether the model is setup
485 // NOTE: Unfortunately this is needed because during evaluation ProcessOptions is not called again
486 if (!fModelIsSetup) {
487 // Setup the trained model
488 SetupKerasModel(true);
489 }
490
491 // Load data to numpy array
492 Long64_t nEvents = Data()->GetNEvents();
493 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
494 if (firstEvt < 0) firstEvt = 0;
495 nEvents = lastEvt-firstEvt;
496
497 // use timer
498 Timer timer( nEvents, GetName(), kTRUE );
499
500 if (logProgress)
501 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
502 << "Evaluation of " << GetMethodName() << " on "
503 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
504 << " sample (" << nEvents << " events)" << Endl;
505
506 float* data = new float[nEvents*fNVars];
507 for (UInt_t i=0; i<nEvents; i++) {
508 Data()->SetCurrentEvent(i);
509 const TMVA::Event *e = GetEvent();
510 for (UInt_t j=0; j<fNVars; j++) {
511 data[j + i*fNVars] = e->GetValue(j);
512 }
513 }
514
515 npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)fNVars};
516 PyArrayObject* pDataMvaValues = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsData, NPY_FLOAT, (void*)data);
517 if (pDataMvaValues==0) Log() << "Failed to load data to Python array" << Endl;
518
519 // Get prediction for all events
520 PyObject* pModel = PyDict_GetItemString(fLocalNS, "model");
521 if (pModel==0) Log() << kFATAL << "Failed to get model Python object" << Endl;
522 PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallMethod(pModel, (char*)"predict", (char*)"O", pDataMvaValues);
523 if (pPredictions==0) Log() << kFATAL << "Failed to get predictions" << Endl;
524 delete[] data;
525
526 // Load predictions to double vector
527 // NOTE: The signal probability is given at the output
528 std::vector<double> mvaValues(nEvents);
529 float* predictionsData = (float*) PyArray_DATA(pPredictions);
530 for (UInt_t i=0; i<nEvents; i++) {
531 mvaValues[i] = (double) predictionsData[i*fNOutputs + TMVA::Types::kSignal];
532 }
533
534 if (logProgress) {
535 Log() << kINFO
536 << "Elapsed time for evaluation of " << nEvents << " events: "
537 << timer.GetElapsedTime() << " " << Endl;
538 }
539
540
541 return mvaValues;
542}
543
544std::vector<Float_t>& MethodPyKeras::GetRegressionValues() {
545 // Check whether the model is setup
546 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
547 if (!fModelIsSetup){
548 // Setup the model and load weights
549 SetupKerasModel(true);
550 }
551
552 // Get regression values
553 const TMVA::Event* e = GetEvent();
554 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
555 PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",
556 "Failed to get predictions");
557
558 // Use inverse transformation of targets to get final regression values
559 Event * eTrans = new Event(*e);
560 for (UInt_t i=0; i<fNOutputs; ++i) {
561 eTrans->SetTarget(i,fOutput[i]);
562 }
563
564 const Event* eTrans2 = GetTransformationHandler().InverseTransform(eTrans);
565 for (UInt_t i=0; i<fNOutputs; ++i) {
566 fOutput[i] = eTrans2->GetTarget(i);
567 }
568
569 return fOutput;
570}
571
572std::vector<Float_t>& MethodPyKeras::GetMulticlassValues() {
573 // Check whether the model is setup
574 // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
575 if (!fModelIsSetup){
576 // Setup the model and load weights
577 SetupKerasModel(true);
578 }
579
580 // Get class probabilites
581 const TMVA::Event* e = GetEvent();
582 for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
583 PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",
584 "Failed to get predictions");
585
586 return fOutput;
587}
588
590}
591
593// typical length of text line:
594// "|--------------------------------------------------------------|"
595 Log() << Endl;
596 Log() << "Keras is a high-level API for the Theano and Tensorflow packages." << Endl;
597 Log() << "This method wraps the training and predictions steps of the Keras" << Endl;
598 Log() << "Python package for TMVA, so that dataloading, preprocessing and" << Endl;
599 Log() << "evaluation can be done within the TMVA system. To use this Keras" << Endl;
600 Log() << "interface, you have to generate a model with Keras first. Then," << Endl;
601 Log() << "this model can be loaded and trained in TMVA." << Endl;
602 Log() << Endl;
603}
604
606 // get the keras backend
607 // check first if using tensorflow backend
608 PyRunString("keras_backend_is_set = keras.backend.backend() == \"tensorflow\"");
609 PyObject * keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
610 if (keras_backend != nullptr && keras_backend == Py_True)
611 return kTensorFlow;
612
613 PyRunString("keras_backend_is_set = keras.backend.backend() == \"theano\"");
614 keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
615 if (keras_backend != nullptr && keras_backend == Py_True)
616 return kTheano;
617
618 PyRunString("keras_backend_is_set = keras.backend.backend() == \"cntk\"");
619 keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
620 if (keras_backend != nullptr && keras_backend == Py_True)
621 return kCNTK;
622
623 return kUndefined;
624}
625
627 // get the keras backend name
629 if (type == kTensorFlow) return "TensorFlow";
630 if (type == kTheano) return "Theano";
631 if (type == kCNTK) return "CNTK";
632 return "Undefined";
633}
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
int type
Definition: TGX11.cxx:120
_object PyObject
Definition: TPyArg.h:20
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Definition: Configurable.h:122
Class that contains all the data information.
Definition: DataSetInfo.h:60
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
UInt_t GetNTargets() const
Definition: DataSetInfo.h:111
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:103
const char * GetName() const
Definition: MethodBase.h:325
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:428
const TString & GetWeightFileDir() const
Definition: MethodBase.h:481
const TString & GetMethodName() const
Definition: MethodBase.h:322
const Event * GetEvent() const
Definition: MethodBase.h:740
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
Definition: MethodBase.h:336
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:385
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:841
DataSet * Data() const
Definition: MethodBase.h:400
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: MethodBase.h:760
void GetHelpMessage() const
std::vector< float > fOutput
Definition: MethodPyKeras.h:96
virtual void TestClassification()
initialization
EBackendType
enumeration defining the used Keras backend
Definition: MethodPyKeras.h:73
void SetupKerasModel(Bool_t loadTrainedModel)
std::vector< Float_t > & GetMulticlassValues()
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper)
std::vector< Float_t > & GetRegressionValues()
TString fNumValidationString
Definition: MethodPyKeras.h:91
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
TString GetKerasBackendName()
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
TString fLearningRateSchedule
Definition: MethodPyKeras.h:89
EBackendType GetKerasBackend()
Get the Keras backend (can be: TensorFlow, Theano or CNTK)
TString fFilenameTrainedModel
Definition: MethodPyKeras.h:99
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
get all the MVA values for the events of the current Data type
static int PyIsInitialized()
Check Python interpreter initialization status.
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
PyObject * fLocalNS
Definition: PyMethodBase.h:143
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
Singleton class for Global types used by TMVA.
Definition: Types.h:73
@ kSignal
Definition: Types.h:136
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
An array of TObjects.
Definition: TObjArray.h:37
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:522
TObject * At(Int_t idx) const
Definition: TObjArray.h:166
virtual const char * GetName() const
Returns name of object.
Definition: TObject.cxx:357
Basic string class.
Definition: TString.h:131
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
@ kTrailing
Definition: TString.h:262
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197
Bool_t IsNull() const
Definition: TString.h:402
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2289
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:748