7#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 
    8#include <numpy/arrayobject.h> 
   91   DeclareOptionRef(
fTriesEarlyStopping, 
"TriesEarlyStopping", 
"Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
 
   94                    "Write a log during training to visualize and monitor the training performance with TensorBoard");
 
   97                    "Specify as 0.2 or 20% to use a fifth of the data set as validation set. " 
   98                    "Specify as 100 to use exactly 100 events. (Default: 20%)");
 
  100                    "Optional python code provided by the user to be executed before loading the Keras model");
 
  113   Int_t nValidationSamples = 0;
 
  118   if (fNumValidationString.EndsWith(
"%")) {
 
  123         Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
 
  124         nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
 
  126         Log() << kFATAL << 
"Cannot parse number \"" << fNumValidationString
 
  127               << 
"\". Expected string like \"20%\" or \"20.0%\"." << 
Endl;
 
  129   } 
else if (fNumValidationString.IsFloat()) {
 
  130      Double_t valSizeAsDouble = fNumValidationString.Atof();
 
  132      if (valSizeAsDouble < 1.0) {
 
  134         nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
 
  137         nValidationSamples = valSizeAsDouble;
 
  140      Log() << kFATAL << 
"Cannot parse number \"" << fNumValidationString << 
"\". Expected string like \"0.2\" or \"100\"." 
  146   if (nValidationSamples < 0) {
 
  147      Log() << kFATAL << 
"Validation size \"" << fNumValidationString << 
"\" is negative." << 
Endl;
 
  150   if (nValidationSamples == 0) {
 
  151      Log() << kFATAL << 
"Validation size \"" << fNumValidationString << 
"\" is zero." << 
Endl;
 
  154   if (nValidationSamples >= (
Int_t)trainingSetSize) {
 
  155      Log() << kFATAL << 
"Validation size \"" << fNumValidationString
 
  156            << 
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize << 
"\")." << 
Endl;
 
  159   return nValidationSamples;
 
  187      Log() << kINFO << 
"Setting up tf.keras" << 
Endl;
 
  189      Log() << kINFO << 
"Setting up keras with " << 
gSystem->
Getenv(
"KERAS_BACKEND") << 
" backend" << 
Endl;
 
  191   bool useTFBackend = 
kFALSE;
 
  192   bool kerasIsCompatible = 
kTRUE;
 
  193   bool kerasIsPresent = 
kFALSE;
 
  200         kerasIsPresent = 
kTRUE;
 
  201      if (kerasIsPresent) {
 
  204            useTFBackend = 
kTRUE;
 
  206            PyRunString(
"keras_major_version = int(keras.__version__.split('.')[0])");
 
  207            PyRunString(
"keras_minor_version = int(keras.__version__.split('.')[1])");
 
  208            PyObject *pyKerasMajorVersion = PyDict_GetItemString(
fLocalNS, 
"keras_major_version");
 
  209            PyObject *pyKerasMinorVersion = PyDict_GetItemString(
fLocalNS, 
"keras_minor_version");
 
  210            int kerasMajorVersion = PyLong_AsLong(pyKerasMajorVersion);
 
  211            int kerasMinorVersion = PyLong_AsLong(pyKerasMinorVersion);
 
  212            Log() << kINFO << 
"Using Keras version " << kerasMajorVersion << 
"." << kerasMinorVersion << 
Endl;
 
  217            kerasIsCompatible = (kerasMajorVersion >= 2 && kerasMinorVersion == 3);
 
  222         Log() << kINFO << 
"Keras is not found. Trying using tf.keras" << 
Endl;
 
  231      if (ret == 
nullptr) {
 
  232         Log() << kFATAL << 
"Importing TensorFlow failed" << 
Endl;
 
  235      PyRunString(
"tf_major_version = int(tf.__version__.split('.')[0])");
 
  237      int tfVersion = PyLong_AsLong(pyTfVersion);
 
  238      Log() << kINFO << 
"Using TensorFlow version " << tfVersion << 
Endl;
 
  242            Log() << kWARNING << 
"Using TensorFlow version 1.x which does not contain tf.keras - use then TensorFlow as Keras backend" << 
Endl;
 
  245            if (!kerasIsPresent) {
 
  246               Log() << kFATAL << 
"Keras is not present and not a suitable TensorFlow version is found " << 
Endl;
 
  253         if (!kerasIsCompatible) {
 
  254            Log() << kWARNING << 
"The Keras version is not compatible with TensorFlow 2. Use instead tf.keras" << 
Endl;
 
  263         Log() << kINFO << 
"Use Keras version from TensorFlow : tf.keras" << 
Endl;
 
  269         Log() << kINFO << 
"Use TensorFlow as Keras backend" << 
Endl;
 
  271         PyRunString(
"from keras.backend import tensorflow_backend as K");
 
  277      TString configProto = (tfVersion >= 2) ? 
"tf.compat.v1.ConfigProto" : 
"tf.ConfigProto";
 
  278      TString session = (tfVersion >= 2) ? 
"tf.compat.v1.Session" : 
"tf.Session";
 
  282      if (num_threads > 0) {
 
  283         Log() << kINFO << 
"Setting the CPU number of threads =  " << num_threads << 
Endl;
 
  286            TString::Format(
"session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
 
  287                            configProto.
Data(), num_threads, num_threads));
 
  296         for (
int item = 0; item < optlist->
GetEntries(); ++item) {
 
  297            Log() << kINFO << 
"Applying GPU option:  gpu_options." << optlist->
At(item)->
GetName() << 
Endl;
 
  306         PyRunString(
"tf.compat.v1.keras.backend.set_session(sess)");
 
  313         Log() << kWARNING << 
"Cannot set the given " << 
fNumThreads << 
" threads when not using tensorflow as  backend" 
  316         Log() << kWARNING << 
"Cannot set the given GPU option " << 
fGpuOptions 
  317               << 
" when not using tensorflow as  backend" << 
Endl;
 
  328   Log() << kINFO << 
" Loading Keras Model " << 
Endl;
 
  340      TString errmsg = 
"Error executing the provided user code";
 
  343      PyRunString(
"print('custom objects for loading model : ',load_model_custom_objects)");
 
  348   if (loadTrainedModel) {
 
  356                     "', custom_objects=load_model_custom_objects)", 
"Failed to load Keras model from file: " + filenameLoadModel);
 
  358   Log() << kINFO << 
"Loaded model from file: " << filenameLoadModel << 
Endl;
 
  369   else Log() << kFATAL << 
"Selected analysis type is not implemented" << 
Endl;
 
  386      PyRunString(
"tf.compat.v1.disable_eager_execution()",
"Failed to disable eager execution");
 
  387      Log() << kINFO << 
"Disabled TF eager execution when evaluating model " << 
Endl;
 
  396      npy_intp dimsVals[2] = {(npy_intp)1, (npy_intp)
fNVars};
 
  397      PyArrayObject* pVals = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (
void*)
fVals.data());
 
  403      npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)
fNOutputs};
 
  404      PyArrayObject* pOutput = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (
void*)
fOutput.data());
 
  419      Log() << kFATAL << 
"Python is not initialized" << 
Endl;
 
  424   PyRunString(
"import sys; sys.argv = ['']", 
"Set sys.argv failed");
 
  441   UInt_t nTrainingEvents = nAllEvents - nValEvents;
 
  443   Log() << kINFO << 
"Split TMVA training data in " << nTrainingEvents << 
" training events and " 
  444         << nValEvents << 
" validation events" << 
Endl;
 
  446   float* trainDataX = 
new float[nTrainingEvents*
fNVars];
 
  447   float* trainDataY = 
new float[nTrainingEvents*
fNOutputs];
 
  448   float* trainDataWeights = 
new float[nTrainingEvents];
 
  449   for (
UInt_t i=0; i<nTrainingEvents; i++) {
 
  453         trainDataX[j + i*
fNVars] = 
e->GetValue(j);
 
  466            trainDataY[j + i*
fNOutputs] = 
e->GetTarget(j);
 
  469      else Log() << kFATAL << 
"Can not fill target vector because analysis type is not known" << 
Endl;
 
  472      trainDataWeights[i] = 
e->GetWeight();
 
  475   npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)
fNVars};
 
  476   npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)
fNOutputs};
 
  477   npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents};
 
  478   PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (
void*)trainDataX);
 
  479   PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (
void*)trainDataY);
 
  480   PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (
void*)trainDataWeights);
 
  483   PyDict_SetItemString(
fLocalNS, 
"trainWeights", (
PyObject*)pTrainDataWeights);
 
  493   float* valDataX = 
new float[nValEvents*
fNVars];
 
  494   float* valDataY = 
new float[nValEvents*
fNOutputs];
 
  495   float* valDataWeights = 
new float[nValEvents];
 
  497   for (
UInt_t i=0; i< nValEvents ; i++) {
 
  498      UInt_t ievt = nTrainingEvents + i; 
 
  502         valDataX[j + i*
fNVars] = 
e->GetValue(j);
 
  516      else Log() << kFATAL << 
"Can not fill target vector because analysis type is not known" << 
Endl;
 
  518      valDataWeights[i] = 
e->GetWeight();
 
  521   npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)
fNVars};
 
  522   npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)
fNOutputs};
 
  523   npy_intp dimsValWeights[1] = {(npy_intp)nValEvents};
 
  524   PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (
void*)valDataX);
 
  525   PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (
void*)valDataY);
 
  526   PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (
void*)valDataWeights);
 
  534   Log() << kINFO << 
"Training Model Summary" << 
Endl;
 
  542   PyDict_SetItemString(
fLocalNS, 
"batchSize", pBatchSize);
 
  543   PyDict_SetItemString(
fLocalNS, 
"numEpochs", pNumEpochs);
 
  544   PyDict_SetItemString(
fLocalNS, 
"verbose", pVerbose);
 
  551      PyRunString(
"callbacks.append(" + 
fKerasString +
".callbacks.ModelCheckpoint('"+
fFilenameTrainedModel+
"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", 
"Failed to setup training callback: SaveBestOnly");
 
  552      Log() << kINFO << 
"Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << 
Endl;
 
  559      PyRunString(
"callbacks.append(" + 
fKerasString + 
".callbacks.EarlyStopping(monitor='val_loss', patience="+tries+
", verbose=verbose, mode='auto'))", 
"Failed to setup training callback: TriesEarlyStopping");
 
  560      Log() << kINFO << 
"Option TriesEarlyStopping: Training will stop after " << tries << 
" number of epochs with no improvement of validation loss" << 
Endl;
 
  567                  "schedulerSteps = {}\n" 
  568                  "for c in strScheduleSteps.split(';'):\n" 
  569                  "    x = c.split(',')\n" 
  570                  "    schedulerSteps[int(x[0])] = float(x[1])\n",
 
  574      PyRunString(
"def schedule(epoch, model=model, schedulerSteps=schedulerSteps):\n" 
  575                  "    if epoch in schedulerSteps: return float(schedulerSteps[epoch])\n" 
  576                  "    else: return float(model.optimizer.lr.get_value())\n",
 
  581                  "Failed to setup training callback: LearningRateSchedule");
 
  589         "callbacks.append(" + 
fKerasString + 
".callbacks.TensorBoard(log_dir=" + logdir +
 
  590            ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
 
  591         "Failed to setup training callback: TensorBoard");
 
  592      Log() << kINFO << 
"Option TensorBoard: Log files for training monitoring are stored in: " << logdir << 
Endl;
 
  596   PyRunString(
"history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
 
  597               "Failed to train model");
 
  600   std::vector<float> fHistory; 
 
  602   npy_intp dimsHistory[1] = { (npy_intp)
fNumEpochs};
 
  603   PyArrayObject* pHistory = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsHistory, NPY_FLOAT, (
void*)&fHistory[0]);
 
  608   PyRunString(
"number_of_keys=len(history.history.keys())");
 
  610   int nkeys=PyLong_AsLong(PyNkeys);
 
  611   for (iHis=0; iHis<nkeys; iHis++) {
 
  616#if PY_MAJOR_VERSION < 3    
  622      PyObject* repr = PyObject_Repr(stra);
 
  623      PyObject* str = PyUnicode_AsEncodedString(repr, 
"utf-8", 
"~E~");
 
  627      Log() << kINFO << 
"Getting training history for item:" << iHis << 
" name = " << 
name << 
Endl;
 
  630      for (
size_t i=0; i<fHistory.size(); i++)
 
  653   delete[] trainDataWeights;
 
  656   delete[] valDataWeights;
 
  679                    + 
")): output[i]=p\n";
 
  695   if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
 
  696   if (firstEvt < 0) firstEvt = 0;
 
  697   nEvents = lastEvt-firstEvt;
 
  706            << 
" sample (" << nEvents << 
" events)" << 
Endl;
 
  709   for (
UInt_t i=0; i<nEvents; i++) {
 
  717   std::vector<double> mvaValues(nEvents);
 
  718   npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)
fNVars};
 
  719   PyArrayObject* pDataMvaValues = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsData, NPY_FLOAT, (
void*)
data);
 
  720   if (pDataMvaValues==0) 
Log() << 
"Failed to load data to Python array" << 
Endl;
 
  724   if (pModel==0) 
Log() << kFATAL << 
"Failed to get model Python object" << 
Endl;
 
  725   PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallMethod(pModel, (
char*)
"predict", (
char*)
"O", pDataMvaValues);
 
  726   if (pPredictions==0) 
Log() << kFATAL << 
"Failed to get predictions" << 
Endl;
 
  730   float* predictionsData = (
float*) PyArray_DATA(pPredictions);
 
  732   for (
UInt_t i=0; i<nEvents; i++) {
 
  738            << 
"Elapsed time for evaluation of " << nEvents <<  
" events: " 
  761                    + 
")): output[i]=p\n";
 
  791                    + 
")): output[i]=p\n";
 
  804   Log() << 
"Keras is a high-level API for the Theano and Tensorflow packages." << 
Endl;
 
  805   Log() << 
"This method wraps the training and predictions steps of the Keras" << 
Endl;
 
  806   Log() << 
"Python package for TMVA, so that dataloading, preprocessing and" << 
Endl;
 
  807   Log() << 
"evaluation can be done within the TMVA system. To use this Keras" << 
Endl;
 
  808   Log() << 
"interface, you have to generate a model with Keras first. Then," << 
Endl;
 
  809   Log() << 
"this model can be loaded and trained in TMVA." << 
Endl;
 
  820   PyRunString(
"keras_backend_is_set =  keras.backend.backend() == \"tensorflow\"");
 
  821   PyObject * keras_backend = PyDict_GetItemString(
fLocalNS,
"keras_backend_is_set");
 
  822   if (keras_backend  != 
nullptr && keras_backend == Py_True)
 
  825   PyRunString(
"keras_backend_is_set =  keras.backend.backend() == \"theano\"");
 
  826   keras_backend = PyDict_GetItemString(
fLocalNS,
"keras_backend_is_set");
 
  827   if (keras_backend != 
nullptr && keras_backend == Py_True)
 
  830   PyRunString(
"keras_backend_is_set =  keras.backend.backend() == \"cntk\"");
 
  831   keras_backend = PyDict_GetItemString(
fLocalNS,
"keras_backend_is_set");
 
  832   if (keras_backend != 
nullptr && keras_backend == Py_True)
 
#define REGISTER_METHOD(CLASS)
for example
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
 
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
 
R__EXTERN TSystem * gSystem
 
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
 
Class that contains all the data information.
 
UInt_t GetNClasses() const
 
UInt_t GetNTargets() const
 
Types::ETreeType GetCurrentType() const
 
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
 
Long64_t GetNTrainingEvents() const
 
void SetCurrentEvent(Long64_t ievt) const
 
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
 
Float_t GetTarget(UInt_t itgt) const
 
PyGILState_STATE m_GILState
 
const char * GetName() const
 
Types::EAnalysisType GetAnalysisType() const
 
const TString & GetWeightFileDir() const
 
const TString & GetMethodName() const
 
const Event * GetEvent() const
 
DataSetInfo & DataInfo() const
 
virtual void TestClassification()
initialization
 
UInt_t GetNVariables() const
 
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
 
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
 
TrainingHistory fTrainHistory
 
const Event * GetTrainingEvent(Long64_t ievt) const
 
void GetHelpMessage() const
 
void Init()
Initialization function called from MethodBase::SetupMethod() Note that option string are not yet fil...
 
std::vector< float > fOutput
 
virtual void TestClassification()
initialization
 
void ProcessOptions()
Function processing the options This is called only when creating the method before training not when...
 
Bool_t UseTFKeras() const
 
Int_t fTriesEarlyStopping
 
EBackendType
enumeration defining the used Keras backend
 
void SetupKerasModel(Bool_t loadTrainedModel)
 
std::vector< Float_t > & GetMulticlassValues()
 
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
 
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper)
 
void SetupKerasModelForEval()
Setting up model for evaluation Add here some needed optimizations like disabling eager execution.
 
std::vector< Float_t > & GetRegressionValues()
 
bool fModelIsSetupForEval
 
TString fNumValidationString
 
std::vector< float > fVals
 
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
 
TString GetKerasBackendName()
 
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
 
TString fLearningRateSchedule
 
EBackendType GetKerasBackend()
Get the Keras backend (can be: TensorFlow, Theano or CNTK)
 
TString fFilenameTrainedModel
 
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
get all the MVA values for the events of the current Data type
 
static int PyIsInitialized()
Check Python interpreter initialization status.
 
static PyObject * fGlobalNS
 
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=256)
Execute Python code from string.
 
Timing information for training and evaluation of MVA methods.
 
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
 
void AddValue(TString Property, Int_t stage, Double_t value)
 
Singleton class for Global types used by TMVA.
 
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
 
Int_t GetEntries() const override
Return the number of objects in array (i.e.
 
TObject * At(Int_t idx) const override
 
virtual const char * GetName() const
Returns name of object.
 
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
 
const char * Data() const
 
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
 
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
 
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
 
virtual const char * Getenv(const char *env)
Get environment variable.
 
std::string ToString(const T &val)
Utility function for conversion to strings.
 
create variable transformations
 
MsgLogger & Endl(MsgLogger &ml)