doc/v620/MethodPyKeras_8cxx_source.html

// @(#)root/tmva/pymva $Id$

// Author: Stefan Wunsch, 2016


#include <Python.h>

#include "TMVA/MethodPyKeras.h"


#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION

#include <numpy/arrayobject.h>


#include "TMVA/Types.h"

#include "TMVA/Config.h"

#include "TMVA/ClassifierFactory.h"

#include "TMVA/Results.h"

#include "TMVA/TransformationHandler.h"

#include "TMVA/VariableTransformBase.h"

#include "TMVA/Tools.h"

#include "TMVA/Timer.h"


using namespace TMVA;


namespace TMVA {

namespace Internal {

class PyGILRAII {

   PyGILState_STATE m_GILState;


public:

   PyGILRAII() : m_GILState(PyGILState_Ensure()) {}

   ~PyGILRAII() { PyGILState_Release(m_GILState); }

};

} // namespace Internal

} // namespace TMVA


REGISTER_METHOD(PyKeras)


ClassImp(MethodPyKeras);


MethodPyKeras::MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption)

   : PyMethodBase(jobName, Types::kPyKeras, methodTitle, dsi, theOption) {

   fNumEpochs = 10;

   fBatchSize = 100;

   fVerbose = 1;

   fContinueTraining = false;

   fSaveBestOnly = true;

   fTriesEarlyStopping = -1;

   fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler

   fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)

   fTensorBoard = "";          // empty string deactivates TensorBoard callback

}


MethodPyKeras::MethodPyKeras(DataSetInfo &theData, const TString &theWeightFile)

    : PyMethodBase(Types::kPyKeras, theData, theWeightFile) {

   fNumEpochs = 10;

   fNumThreads = 0;

   fBatchSize = 100;

   fVerbose = 1;

   fContinueTraining = false;

   fSaveBestOnly = true;

   fTriesEarlyStopping = -1;

   fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler

   fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)

   fTensorBoard = "";          // empty string deactivates TensorBoard callback

}


MethodPyKeras::~MethodPyKeras() {

}


Bool_t MethodPyKeras::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t) {

   if (type == Types::kRegression) return kTRUE;

   if (type == Types::kClassification && numberClasses == 2) return kTRUE;

   if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;

   return kFALSE;

}


///////////////////////////////////////////////////////////////////////////////


void MethodPyKeras::DeclareOptions() {

   DeclareOptionRef(fFilenameModel, "FilenameModel", "Filename of the initial Keras model");

   DeclareOptionRef(fFilenameTrainedModel, "FilenameTrainedModel", "Filename of the trained output Keras model");

   DeclareOptionRef(fBatchSize, "BatchSize", "Training batch size");

   DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs");

   DeclareOptionRef(fNumThreads, "NumThreads", "Number of CPU threads (only for Tensorflow backend)");

   DeclareOptionRef(fGpuOptions, "GpuOptions", "GPU options for tensorflow, such as allow_growth");

   DeclareOptionRef(fVerbose, "Verbose", "Keras verbosity during training");

   DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training");

   DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss");

   DeclareOptionRef(fTriesEarlyStopping, "TriesEarlyStopping", "Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");

   DeclareOptionRef(fLearningRateSchedule, "LearningRateSchedule", "Set new learning rate during training at specific epochs, e.g., \"50,0.01;70,0.005\"");

   DeclareOptionRef(fTensorBoard, "TensorBoard",

                    "Write a log during training to visualize and monitor the training performance with TensorBoard");

   DeclareOptionRef(fTensorBoard, "TensorBoard",

                    "Write a log during training to visualize and monitor the training performance with TensorBoard");


   DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "

                    "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "

                    "Specify as 100 to use exactly 100 events. (Default: 20%)");


}


////////////////////////////////////////////////////////////////////////////////

/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and

/// 100 etc.

///    - 20% and 0.2 selects 20% of the training set as validation data.

///    - 100 selects 100 events as the validation data.

///

/// @return number of samples in validation set

///

UInt_t TMVA::MethodPyKeras::GetNumValidationSamples()

{

   Int_t nValidationSamples = 0;

   UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();


   // Parsing + Validation

   // --------------------

   if (fNumValidationString.EndsWith("%")) {

      // Relative spec. format 20%

      TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));


      if (intValStr.IsFloat()) {

         Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;

         nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;

      } else {

         Log() << kFATAL << "Cannot parse number \"" << fNumValidationString

               << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;

      }

   } else if (fNumValidationString.IsFloat()) {

      Double_t valSizeAsDouble = fNumValidationString.Atof();


      if (valSizeAsDouble < 1.0) {

         // Relative spec. format 0.2

         nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;

      } else {

         // Absolute spec format 100 or 100.0

         nValidationSamples = valSizeAsDouble;

      }

   } else {

      Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."

            << Endl;

   }


   // Value validation

   // ----------------

   if (nValidationSamples < 0) {

      Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;

   }


   if (nValidationSamples == 0) {

      Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;

   }


   if (nValidationSamples >= (Int_t)trainingSetSize) {

      Log() << kFATAL << "Validation size \"" << fNumValidationString

            << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;

   }


   return nValidationSamples;

}


void MethodPyKeras::ProcessOptions() {

   // Set default filename for trained model if option is not used

   if (fFilenameTrainedModel.IsNull()) {

      fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".h5";

   }


   // set here some specific options for Tensorflow backend

   //  -  when using tensorflow gpu set option to allow memory growth to avoid allocating all memory

   //  -  set up number of threads for CPU if NumThreads option was specified


   // check first if using tensorflow backend

   if (GetKerasBackend() == kTensorFlow) {

      Log() << kINFO << "Using TensorFlow backend - setting special configuration options "  << Endl;

      PyRunString("import tensorflow as tf");

      PyRunString("from keras.backend import tensorflow_backend as K");


      // check tensorflow version

      PyRunString("tf_major_version = int(tf.__version__.split('.')[0])");

      //PyRunString("print(tf.__version__,'major is ',tf_major_version)");

      PyObject *pyTfVersion = PyDict_GetItemString(fLocalNS, "tf_major_version");

      int tfVersion = PyLong_AsLong(pyTfVersion);

      Log() << kINFO << "Using Tensorflow version " << tfVersion << Endl;


      // use different naming in tf2 for ConfigProto and Session

      TString configProto = (tfVersion >= 2) ? "tf.compat.v1.ConfigProto" : "tf.ConfigProto";

      TString session = (tfVersion >= 2) ? "tf.compat.v1.Session" : "tf.Session";


      // in case specify number of threads

      int num_threads = fNumThreads;

      if (num_threads > 0) {

         Log() << kINFO << "Setting the CPU number of threads =  "  << num_threads << Endl;


         PyRunString(TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",

                                        configProto.Data(), num_threads,num_threads));

      }

      else

         PyRunString(TString::Format("session_conf = %s()",configProto.Data()));


      // applying GPU options such as allow_growth=True to avoid allocating all memory on GPU

      // that prevents running later TMVA-GPU

      // Also new Nvidia RTX cards (e.g. RTX 2070)  require this option

      if (!fGpuOptions.IsNull() ) {

         TObjArray * optlist = fGpuOptions.Tokenize(",");

         for (int item = 0; item < optlist->GetEntries(); ++item) {

            Log() << kINFO << "Applying GPU option:  gpu_options." << optlist->At(item)->GetName() << Endl;

            PyRunString(TString::Format("session_conf.gpu_options.%s", optlist->At(item)->GetName()));

         }

      }

      PyRunString(TString::Format("sess = %s(config=session_conf)", session.Data()));


      if (tfVersion < 2) {

         PyRunString("K.set_session(sess)");

      } else {

         PyRunString("tf.compat.v1.keras.backend.set_session(sess)");

      }

   }

   else {

      if (fNumThreads > 0)

         Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as  backend"  << Endl;

      if (!fGpuOptions.IsNull() ) {

         Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions << " when not using tensorflow as  backend"  << Endl;

      }

   }


   // Setup model, either the initial model from `fFilenameModel` or

   // the trained model from `fFilenameTrainedModel`

   if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;

   SetupKerasModel(fContinueTraining);

}


void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {

   /*

    * Load Keras model from file

    */


   // Load initial model or already trained model

   TString filenameLoadModel;

   if (loadTrainedModel) {

      filenameLoadModel = fFilenameTrainedModel;

   }

   else {

      filenameLoadModel = fFilenameModel;

   }

   PyRunString("model = keras.models.load_model('"+filenameLoadModel+"')",

               "Failed to load Keras model from file: "+filenameLoadModel);

   Log() << kINFO << "Load model from file: " << filenameLoadModel << Endl;


   /*

    * Init variables and weights

    */


   // Get variables, classes and target numbers

   fNVars = GetNVariables();

   if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) fNOutputs = DataInfo().GetNClasses();

   else if (GetAnalysisType() == Types::kRegression) fNOutputs = DataInfo().GetNTargets();

   else Log() << kFATAL << "Selected analysis type is not implemented" << Endl;


   // Init evaluation (needed for getMvaValue)

   fVals = new float[fNVars]; // holds values used for classification and regression

   npy_intp dimsVals[2] = {(npy_intp)1, (npy_intp)fNVars};

   PyArrayObject* pVals = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (void*)fVals);

   PyDict_SetItemString(fLocalNS, "vals", (PyObject*)pVals);


   fOutput.resize(fNOutputs); // holds classification probabilities or regression output

   npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)fNOutputs};

   PyArrayObject* pOutput = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (void*)&fOutput[0]);

   PyDict_SetItemString(fLocalNS, "output", (PyObject*)pOutput);


   // Mark the model as setup

   fModelIsSetup = true;

}


void MethodPyKeras::Init() {


   TMVA::Internal::PyGILRAII raii;


   if (!PyIsInitialized()) {

      Log() << kFATAL << "Python is not initialized" << Endl;

   }

   _import_array(); // required to use numpy arrays


   // Import Keras

   // NOTE: sys.argv has to be cleared because otherwise TensorFlow breaks

   PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed");

   PyRunString("import keras", "Import Keras failed");


   // Set flag that model is not setup

   fModelIsSetup = false;

}


void MethodPyKeras::Train() {

   if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl;


   /*

    * Load training data to numpy array

    */


   UInt_t nAllEvents = Data()->GetNTrainingEvents();

   UInt_t nValEvents = GetNumValidationSamples();

   UInt_t nTrainingEvents = nAllEvents - nValEvents;


   Log() << kINFO << "Split TMVA training data in " << nTrainingEvents << " training events and "

         << nValEvents << " validation events" << Endl;


   float* trainDataX = new float[nTrainingEvents*fNVars];

   float* trainDataY = new float[nTrainingEvents*fNOutputs];

   float* trainDataWeights = new float[nTrainingEvents];

   for (UInt_t i=0; i<nTrainingEvents; i++) {

      const TMVA::Event* e = GetTrainingEvent(i);

      // Fill variables

      for (UInt_t j=0; j<fNVars; j++) {

         trainDataX[j + i*fNVars] = e->GetValue(j);

      }

      // Fill targets

      // NOTE: For classification, convert class number in one-hot vector,

      // e.g., 1 -> [0, 1] or 0 -> [1, 0] for binary classification

      if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) {

         for (UInt_t j=0; j<fNOutputs; j++) {

            trainDataY[j + i*fNOutputs] = 0;

         }

         trainDataY[e->GetClass() + i*fNOutputs] = 1;

      }

      else if (GetAnalysisType() == Types::kRegression) {

         for (UInt_t j=0; j<fNOutputs; j++) {

            trainDataY[j + i*fNOutputs] = e->GetTarget(j);

         }

      }

      else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;

      // Fill weights

      // NOTE: If no weight branch is given, this defaults to ones for all events

      trainDataWeights[i] = e->GetWeight();

   }


   npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNVars};

   npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNOutputs};

   npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents};

   PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (void*)trainDataX);

   PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (void*)trainDataY);

   PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (void*)trainDataWeights);

   PyDict_SetItemString(fLocalNS, "trainX", (PyObject*)pTrainDataX);

   PyDict_SetItemString(fLocalNS, "trainY", (PyObject*)pTrainDataY);

   PyDict_SetItemString(fLocalNS, "trainWeights", (PyObject*)pTrainDataWeights);


   /*

    * Load validation data to numpy array

    */


   // NOTE: from TMVA,  we get the validation data as a subset of all the training data

   // we will not use test data for validation. They will be used for the real testing


   float* valDataX = new float[nValEvents*fNVars];

   float* valDataY = new float[nValEvents*fNOutputs];

   float* valDataWeights = new float[nValEvents];

   //validation events follows the trainig one in the TMVA training vector

   for (UInt_t i=0; i< nValEvents ; i++) {

      UInt_t ievt = nTrainingEvents + i; // TMVA event index

      const TMVA::Event* e = GetTrainingEvent(ievt);

      // Fill variables

      for (UInt_t j=0; j<fNVars; j++) {

         valDataX[j + i*fNVars] = e->GetValue(j);

      }

      // Fill targets

      if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) {

         for (UInt_t j=0; j<fNOutputs; j++) {

            valDataY[j + i*fNOutputs] = 0;

         }

         valDataY[e->GetClass() + i*fNOutputs] = 1;

      }

      else if (GetAnalysisType() == Types::kRegression) {

         for (UInt_t j=0; j<fNOutputs; j++) {

            valDataY[j + i*fNOutputs] = e->GetTarget(j);

         }

      }

      else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;

      // Fill weights

      valDataWeights[i] = e->GetWeight();

   }


   npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)fNVars};

   npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)fNOutputs};

   npy_intp dimsValWeights[1] = {(npy_intp)nValEvents};

   PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (void*)valDataX);

   PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (void*)valDataY);

   PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (void*)valDataWeights);

   PyDict_SetItemString(fLocalNS, "valX", (PyObject*)pValDataX);

   PyDict_SetItemString(fLocalNS, "valY", (PyObject*)pValDataY);

   PyDict_SetItemString(fLocalNS, "valWeights", (PyObject*)pValDataWeights);


   /*

    * Train Keras model

    */

   Log() << kINFO << "Training Model Summary" << Endl;

   PyRunString("model.summary()");


   // Setup parameters


   PyObject* pBatchSize = PyLong_FromLong(fBatchSize);

   PyObject* pNumEpochs = PyLong_FromLong(fNumEpochs);

   PyObject* pVerbose = PyLong_FromLong(fVerbose);

   PyDict_SetItemString(fLocalNS, "batchSize", pBatchSize);

   PyDict_SetItemString(fLocalNS, "numEpochs", pNumEpochs);

   PyDict_SetItemString(fLocalNS, "verbose", pVerbose);


   // Setup training callbacks

   PyRunString("callbacks = []");


   // Callback: Save only weights with smallest validation loss

   if (fSaveBestOnly) {

      PyRunString("callbacks.append(keras.callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly");

      Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl;

   }


   // Callback: Stop training early if no improvement in validation loss is observed

   if (fTriesEarlyStopping>=0) {

      TString tries;

      tries.Form("%i", fTriesEarlyStopping);

      PyRunString("callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping");

      Log() << kINFO << "Option TriesEarlyStopping: Training will stop after " << tries << " number of epochs with no improvement of validation loss" << Endl;

   }


   // Callback: Learning rate scheduler

   if (fLearningRateSchedule!="") {

      // Setup a python dictionary with the desired learning rate steps

      PyRunString("strScheduleSteps = '"+fLearningRateSchedule+"'\n"

                  "schedulerSteps = {}\n"

                  "for c in strScheduleSteps.split(';'):\n"

                  "    x = c.split(',')\n"

                  "    schedulerSteps[int(x[0])] = float(x[1])\n",

                  "Failed to setup steps for scheduler function from string: "+fLearningRateSchedule,

                   Py_file_input);

      // Set scheduler function as piecewise function with given steps

      PyRunString("def schedule(epoch, model=model, schedulerSteps=schedulerSteps):\n"

                  "    if epoch in schedulerSteps: return float(schedulerSteps[epoch])\n"

                  "    else: return float(model.optimizer.lr.get_value())\n",

                  "Failed to setup scheduler function with string: "+fLearningRateSchedule,

                  Py_file_input);

      // Setup callback

      PyRunString("callbacks.append(keras.callbacks.LearningRateScheduler(schedule))",

                  "Failed to setup training callback: LearningRateSchedule");

      Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl;

   }


   // Callback: TensorBoard

   if (fTensorBoard != "") {

      TString logdir = TString("'") + fTensorBoard + TString("'");

      PyRunString(

         "callbacks.append(keras.callbacks.TensorBoard(log_dir=" + logdir +

            ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",

         "Failed to setup training callback: TensorBoard");

      Log() << kINFO << "Option TensorBoard: Log files for training monitoring are stored in: " << logdir << Endl;

   }


   // Train model

   PyRunString("history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",

               "Failed to train model");


   std::vector<float> fHistory; // Hold training history  (val_acc or loss etc)

   fHistory.resize(fNumEpochs); // holds training loss or accuracy output

   npy_intp dimsHistory[1] = { (npy_intp)fNumEpochs};

   PyArrayObject* pHistory = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsHistory, NPY_FLOAT, (void*)&fHistory[0]);

   PyDict_SetItemString(fLocalNS, "HistoryOutput", (PyObject*)pHistory);


   // Store training history data

   Int_t iHis=0;

   PyRunString("number_of_keys=len(history.history.keys())");

   PyObject* PyNkeys=PyDict_GetItemString(fLocalNS, "number_of_keys");

   int nkeys=PyLong_AsLong(PyNkeys);

   for (iHis=0; iHis<nkeys; iHis++) {


      PyRunString(TString::Format("copy_string=str(list(history.history.keys())[%d])",iHis));

      //PyRunString("print (copy_string)");

      PyObject* stra=PyDict_GetItemString(fLocalNS, "copy_string");

      if(!stra) break;

#if PY_MAJOR_VERSION < 3   // for Python2

      const char *stra_name = PyBytes_AsString(stra);

      // need to add string delimiter for Python2

      TString sname = TString::Format("'%s'",stra_name);

      const char * name = sname.Data();

#else   // for Python3

      PyObject* repr = PyObject_Repr(stra);

      PyObject* str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~");

      const char *name = PyBytes_AsString(str);

#endif


      Log() << kINFO << "Getting training history for item:" << iHis << " name = " << name << Endl;

      PyRunString(TString::Format("for i,p in enumerate(history.history[%s]):\n   HistoryOutput[i]=p\n",name),

                  TString::Format("Failed to get %s from training history",name));

      for (size_t i=0; i<fHistory.size(); i++)

         fTrainHistory.AddValue(name,i+1,fHistory[i]);


   }

//#endif


   /*

    * Store trained model to file (only if option 'SaveBestOnly' is NOT activated,

    * because we do not want to override the best model checkpoint)

    */


   if (!fSaveBestOnly) {

      PyRunString("model.save('"+fFilenameTrainedModel+"', overwrite=True)",

                  "Failed to save trained model: "+fFilenameTrainedModel);

      Log() << kINFO << "Trained model written to file: " << fFilenameTrainedModel << Endl;

   }


   /*

    * Clean-up

    */


   delete[] trainDataX;

   delete[] trainDataY;

   delete[] trainDataWeights;

   delete[] valDataX;

   delete[] valDataY;

   delete[] valDataWeights;

}


void MethodPyKeras::TestClassification() {

    MethodBase::TestClassification();

}


Double_t MethodPyKeras::GetMvaValue(Double_t *errLower, Double_t *errUpper) {

   // Cannot determine error

   NoErrorCalc(errLower, errUpper);


   // Check whether the model is setup

   // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again

   if (!fModelIsSetup) {

      // Setup the trained model

      SetupKerasModel(true);

   }


   // Get signal probability (called mvaValue here)

   const TMVA::Event* e = GetEvent();

   for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);

   PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",

               "Failed to get predictions");


   return fOutput[TMVA::Types::kSignal];

}


std::vector<Double_t> MethodPyKeras::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) {

   // Check whether the model is setup

   // NOTE: Unfortunately this is needed because during evaluation ProcessOptions is not called again

   if (!fModelIsSetup) {

      // Setup the trained model

      SetupKerasModel(true);

   }


   // Load data to numpy array

   Long64_t nEvents = Data()->GetNEvents();

   if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;

   if (firstEvt < 0) firstEvt = 0;

   nEvents = lastEvt-firstEvt;


   // use timer

   Timer timer( nEvents, GetName(), kTRUE );


   if (logProgress)

      Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())

            << "Evaluation of " << GetMethodName() << " on "

            << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")

            << " sample (" << nEvents << " events)" << Endl;


   float* data = new float[nEvents*fNVars];

   for (UInt_t i=0; i<nEvents; i++) {

      Data()->SetCurrentEvent(i);

      const TMVA::Event *e = GetEvent();

      for (UInt_t j=0; j<fNVars; j++) {

         data[j + i*fNVars] = e->GetValue(j);

      }

   }


   npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)fNVars};

   PyArrayObject* pDataMvaValues = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsData, NPY_FLOAT, (void*)data);

   if (pDataMvaValues==0) Log() << "Failed to load data to Python array" << Endl;


   // Get prediction for all events

   PyObject* pModel = PyDict_GetItemString(fLocalNS, "model");

   if (pModel==0) Log() << kFATAL << "Failed to get model Python object" << Endl;

   PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallMethod(pModel, (char*)"predict", (char*)"O", pDataMvaValues);

   if (pPredictions==0) Log() << kFATAL << "Failed to get predictions" << Endl;

   delete[] data;


   // Load predictions to double vector

   // NOTE: The signal probability is given at the output

   std::vector<double> mvaValues(nEvents);

   float* predictionsData = (float*) PyArray_DATA(pPredictions);

   for (UInt_t i=0; i<nEvents; i++) {

      mvaValues[i] = (double) predictionsData[i*fNOutputs + TMVA::Types::kSignal];

   }


   if (logProgress) {

      Log() << kINFO

            << "Elapsed time for evaluation of " << nEvents <<  " events: "

            << timer.GetElapsedTime() << "       " << Endl;

   }


   return mvaValues;

}


std::vector<Float_t>& MethodPyKeras::GetRegressionValues() {

   // Check whether the model is setup

   // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again

   if (!fModelIsSetup){

      // Setup the model and load weights

      SetupKerasModel(true);

   }


   // Get regression values

   const TMVA::Event* e = GetEvent();

   for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);

   PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",

               "Failed to get predictions");


   // Use inverse transformation of targets to get final regression values

   Event * eTrans = new Event(*e);

   for (UInt_t i=0; i<fNOutputs; ++i) {

      eTrans->SetTarget(i,fOutput[i]);

   }


   const Event* eTrans2 = GetTransformationHandler().InverseTransform(eTrans);

   for (UInt_t i=0; i<fNOutputs; ++i) {

      fOutput[i] = eTrans2->GetTarget(i);

   }


   return fOutput;

}


std::vector<Float_t>& MethodPyKeras::GetMulticlassValues() {

   // Check whether the model is setup

   // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again

   if (!fModelIsSetup){

      // Setup the model and load weights

      SetupKerasModel(true);

   }


   // Get class probabilites

   const TMVA::Event* e = GetEvent();

   for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);

   PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",

               "Failed to get predictions");


   return fOutput;

}


void MethodPyKeras::ReadModelFromFile() {

}


void MethodPyKeras::GetHelpMessage() const {

// typical length of text line:

//          "|--------------------------------------------------------------|"

   Log() << Endl;

   Log() << "Keras is a high-level API for the Theano and Tensorflow packages." << Endl;

   Log() << "This method wraps the training and predictions steps of the Keras" << Endl;

   Log() << "Python package for TMVA, so that dataloading, preprocessing and" << Endl;

   Log() << "evaluation can be done within the TMVA system. To use this Keras" << Endl;

   Log() << "interface, you have to generate a model with Keras first. Then," << Endl;

   Log() << "this model can be loaded and trained in TMVA." << Endl;

   Log() << Endl;

}


MethodPyKeras::EBackendType MethodPyKeras::GetKerasBackend()  {

   // get the keras backend

   // check first if using tensorflow backend

   PyRunString("keras_backend_is_set =  keras.backend.backend() == \"tensorflow\"");

   PyObject * keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");

   if (keras_backend  != nullptr && keras_backend == Py_True)

      return kTensorFlow;


   PyRunString("keras_backend_is_set =  keras.backend.backend() == \"theano\"");

   keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");

   if (keras_backend != nullptr && keras_backend == Py_True)

      return kTheano;


   PyRunString("keras_backend_is_set =  keras.backend.backend() == \"cntk\"");

   keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");

   if (keras_backend != nullptr && keras_backend == Py_True)

      return kCNTK;


   return kUndefined;

}


TString MethodPyKeras::GetKerasBackendName()  {

   // get the keras backend name

   EBackendType type = GetKerasBackend();

   if (type == kTensorFlow) return "TensorFlow";

   if (type == kTheano) return "Theano";

   if (type == kCNTK) return "CNTK";

   return "Undefined";

}

ClassifierFactory.h

REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition: ClassifierFactory.h:124

Config.h

MethodPyKeras.h

PyBytes_AsString
#define PyBytes_AsString
Definition: PyROOT.h:67

e
#define e(i)
Definition: RSha256.hxx:103

Results.h

Int_t
int Int_t
Definition: RtypesCore.h:41

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:88

Bool_t
bool Bool_t
Definition: RtypesCore.h:59

Double_t
double Double_t
Definition: RtypesCore.h:55

Long64_t
long long Long64_t
Definition: RtypesCore.h:69

kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:87

ClassImp
#define ClassImp(name)
Definition: Rtypes.h:365

name
char name[80]
Definition: TGX11.cxx:109

type
int type
Definition: TGX11.cxx:120

PyObject
_object PyObject
Definition: TPyArg.h:20

Form
char * Form(const char *fmt,...)

Timer.h

Tools.h

TransformationHandler.h

VariableTransformBase.h

TMVA::Configurable::DeclareOptionRef
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")

TMVA::Configurable::Log
MsgLogger & Log() const
Definition: Configurable.h:122

TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:60

TMVA::DataSetInfo::GetNClasses
UInt_t GetNClasses() const
Definition: DataSetInfo.h:153

TMVA::DataSetInfo::GetNTargets
UInt_t GetNTargets() const
Definition: DataSetInfo.h:126

TMVA::DataSet::GetCurrentType
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205

TMVA::DataSet::GetNEvents
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217

TMVA::DataSet::GetNTrainingEvents
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79

TMVA::DataSet::SetCurrentEvent
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99

TMVA::Event
Definition: Event.h:51

TMVA::Event::SetTarget
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360

TMVA::Event::GetTarget
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102

TMVA::MethodBase::GetName
const char * GetName() const
Definition: MethodBase.h:333

TMVA::MethodBase::GetAnalysisType
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:437

TMVA::MethodBase::GetWeightFileDir
const TString & GetWeightFileDir() const
Definition: MethodBase.h:490

TMVA::MethodBase::GetMethodName
const TString & GetMethodName() const
Definition: MethodBase.h:330

TMVA::MethodBase::GetEvent
const Event * GetEvent() const
Definition: MethodBase.h:749

TMVA::MethodBase::DataInfo
DataSetInfo & DataInfo() const
Definition: MethodBase.h:409

TMVA::MethodBase::TestClassification
virtual void TestClassification()
initialization
Definition: MethodBase.cxx:1116

TMVA::MethodBase::GetNVariables
UInt_t GetNVariables() const
Definition: MethodBase.h:344

TMVA::MethodBase::GetTransformationHandler
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:393

TMVA::MethodBase::NoErrorCalc
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:841

TMVA::MethodBase::fTrainHistory
TrainingHistory fTrainHistory
Definition: MethodBase.h:425

TMVA::MethodBase::Data
DataSet * Data() const
Definition: MethodBase.h:408

TMVA::MethodBase::GetTrainingEvent
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: MethodBase.h:769

TMVA::MethodPyKeras
Definition: MethodPyKeras.h:33

TMVA::MethodPyKeras::fVerbose
Int_t fVerbose
Definition: MethodPyKeras.h:85

TMVA::MethodPyKeras::GetHelpMessage
void GetHelpMessage() const
Definition: MethodPyKeras.cxx:651

TMVA::MethodPyKeras::fNVars
UInt_t fNVars
Definition: MethodPyKeras.h:97

TMVA::MethodPyKeras::Init
void Init()
Definition: MethodPyKeras.cxx:272

TMVA::MethodPyKeras::fOutput
std::vector< float > fOutput
Definition: MethodPyKeras.h:96

TMVA::MethodPyKeras::fContinueTraining
Bool_t fContinueTraining
Definition: MethodPyKeras.h:86

TMVA::MethodPyKeras::fBatchSize
UInt_t fBatchSize
Definition: MethodPyKeras.h:82

TMVA::MethodPyKeras::TestClassification
virtual void TestClassification()
initialization
Definition: MethodPyKeras.cxx:518

TMVA::MethodPyKeras::ProcessOptions
void ProcessOptions()
Definition: MethodPyKeras.cxx:159

TMVA::MethodPyKeras::fTensorBoard
TString fTensorBoard
Definition: MethodPyKeras.h:90

TMVA::MethodPyKeras::fTriesEarlyStopping
Int_t fTriesEarlyStopping
Definition: MethodPyKeras.h:88

TMVA::MethodPyKeras::DeclareOptions
void DeclareOptions()
Definition: MethodPyKeras.cxx:76

TMVA::MethodPyKeras::EBackendType
EBackendType
enumeration defining the used Keras backend
Definition: MethodPyKeras.h:73

TMVA::MethodPyKeras::kTheano
@ kTheano
Definition: MethodPyKeras.h:73

TMVA::MethodPyKeras::kCNTK
@ kCNTK
Definition: MethodPyKeras.h:73

TMVA::MethodPyKeras::kTensorFlow
@ kTensorFlow
Definition: MethodPyKeras.h:73

TMVA::MethodPyKeras::kUndefined
@ kUndefined
Definition: MethodPyKeras.h:73

TMVA::MethodPyKeras::SetupKerasModel
void SetupKerasModel(Bool_t loadTrainedModel)
Definition: MethodPyKeras.cxx:229

TMVA::MethodPyKeras::fFilenameModel
TString fFilenameModel
Definition: MethodPyKeras.h:81

TMVA::MethodPyKeras::GetMulticlassValues
std::vector< Float_t > & GetMulticlassValues()
Definition: MethodPyKeras.cxx:631

TMVA::MethodPyKeras::GetNumValidationSamples
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
Definition: MethodPyKeras.cxx:108

TMVA::MethodPyKeras::Train
void Train()
Definition: MethodPyKeras.cxx:290

TMVA::MethodPyKeras::GetMvaValue
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper)
Definition: MethodPyKeras.cxx:522

TMVA::MethodPyKeras::GetRegressionValues
std::vector< Float_t > & GetRegressionValues()
Definition: MethodPyKeras.cxx:603

TMVA::MethodPyKeras::fVals
float * fVals
Definition: MethodPyKeras.h:95

TMVA::MethodPyKeras::fNumValidationString
TString fNumValidationString
Definition: MethodPyKeras.h:91

TMVA::MethodPyKeras::fModelIsSetup
bool fModelIsSetup
Definition: MethodPyKeras.h:94

TMVA::MethodPyKeras::~MethodPyKeras
~MethodPyKeras()
Definition: MethodPyKeras.cxx:64

TMVA::MethodPyKeras::ReadModelFromFile
void ReadModelFromFile()
Definition: MethodPyKeras.cxx:648

TMVA::MethodPyKeras::HasAnalysisType
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Definition: MethodPyKeras.cxx:67

TMVA::MethodPyKeras::fSaveBestOnly
Bool_t fSaveBestOnly
Definition: MethodPyKeras.h:87

TMVA::MethodPyKeras::GetKerasBackendName
TString GetKerasBackendName()
Definition: MethodPyKeras.cxx:685

TMVA::MethodPyKeras::MethodPyKeras
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
Definition: MethodPyKeras.cxx:37

TMVA::MethodPyKeras::fGpuOptions
TString fGpuOptions
Definition: MethodPyKeras.h:92

TMVA::MethodPyKeras::fNumThreads
Int_t fNumThreads
Definition: MethodPyKeras.h:84

TMVA::MethodPyKeras::fLearningRateSchedule
TString fLearningRateSchedule
Definition: MethodPyKeras.h:89

TMVA::MethodPyKeras::fNOutputs
UInt_t fNOutputs
Definition: MethodPyKeras.h:98

TMVA::MethodPyKeras::GetKerasBackend
EBackendType GetKerasBackend()
Get the Keras backend (can be: TensorFlow, Theano or CNTK)
Definition: MethodPyKeras.cxx:664

TMVA::MethodPyKeras::fFilenameTrainedModel
TString fFilenameTrainedModel
Definition: MethodPyKeras.h:99

TMVA::MethodPyKeras::fNumEpochs
UInt_t fNumEpochs
Definition: MethodPyKeras.h:83

TMVA::MethodPyKeras::GetMvaValues
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
get all the MVA values for the events of the current Data type
Definition: MethodPyKeras.cxx:542

TMVA::PyMethodBase
Definition: PyMethodBase.h:55

TMVA::PyMethodBase::PyIsInitialized
static int PyIsInitialized()
Check Python interpreter initialization status.
Definition: PyMethodBase.cxx:248

TMVA::PyMethodBase::PyRunString
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
Definition: PyMethodBase.cxx:320

TMVA::PyMethodBase::fLocalNS
PyObject * fLocalNS
Definition: PyMethodBase.h:130

TMVA::Timer
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58

TMVA::Timer::GetElapsedTime
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:140

TMVA::TrainingHistory::AddValue
void AddValue(TString Property, Int_t stage, Double_t value)
Definition: TrainingHistory.cxx:45

TMVA::TransformationHandler::InverseTransform
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
Definition: TransformationHandler.cxx:167

TMVA::Types
Singleton class for Global types used by TMVA.
Definition: Types.h:73

TMVA::Types::kSignal
@ kSignal
Definition: Types.h:136

TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:127

TMVA::Types::kMulticlass
@ kMulticlass
Definition: Types.h:130

TMVA::Types::kClassification
@ kClassification
Definition: Types.h:128

TMVA::Types::kRegression
@ kRegression
Definition: Types.h:129

TMVA::Types::kTraining
@ kTraining
Definition: Types.h:144

TObjArray
An array of TObjects.
Definition: TObjArray.h:37

TObjArray::GetEntries
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:522

TObjArray::At
TObject * At(Int_t idx) const
Definition: TObjArray.h:166

TObject::GetName
virtual const char * GetName() const
Returns name of object.
Definition: TObject.cxx:357

TString
Basic string class.
Definition: TString.h:131

TString::IsFloat
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791

TString::Data
const char * Data() const
Definition: TString.h:364

TString::kTrailing
@ kTrailing
Definition: TString.h:262

TString::Tokenize
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197

TString::IsNull
Bool_t IsNull() const
Definition: TString.h:402

TString::Format
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311

TString::Form
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2289

TMVA
create variable transformations
Definition: GeneticMinimizer.h:21

TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158

TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:750

Types.h