doc/hackathon/MethodPyTorch_8cxx_source.html

// @(#)root/tmva/pymva $Id$

// Author: Anirudh Dagar, 2020


#include <Python.h>

#include "TMVA/MethodPyTorch.h"


#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION

#include <numpy/arrayobject.h>


#include "TMVA/Types.h"

#include "TMVA/Config.h"

#include "TMVA/ClassifierFactory.h"

#include "TMVA/Results.h"

#include "TMVA/TransformationHandler.h"

#include "TMVA/VariableTransformBase.h"

#include "TMVA/Tools.h"

#include "TMVA/Timer.h"


using namespace TMVA;


namespace TMVA {

namespace Internal {

class PyGILRAII {

   PyGILState_STATE m_GILState;


public:

   PyGILRAII() : m_GILState(PyGILState_Ensure()) {}

   ~PyGILRAII() { PyGILState_Release(m_GILState); }

};

} // namespace Internal

} // namespace TMVA


REGISTER_METHOD(PyTorch)


MethodPyTorch::MethodPyTorch(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption)

   : PyMethodBase(jobName, Types::kPyTorch, methodTitle, dsi, theOption) {

   fNumEpochs = 10;

   fBatchSize = 100;


   fContinueTraining = false;

   fSaveBestOnly = true;

   fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler

   fFilenameTrainedModel = ""; // empty string sets output model filename to default (in "weights/" directory.)

}


MethodPyTorch::MethodPyTorch(DataSetInfo &theData, const TString &theWeightFile)

    : PyMethodBase(Types::kPyTorch, theData, theWeightFile) {

   fNumEpochs = 10;

   fBatchSize = 100;


   fContinueTraining = false;

   fSaveBestOnly = true;

   fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler

   fFilenameTrainedModel = ""; // empty string sets output model filename to default (in "weights/" directory.)

}


MethodPyTorch::~MethodPyTorch() {

   if (fPyVals != nullptr) Py_DECREF(fPyVals);

   if (fPyOutput != nullptr) Py_DECREF(fPyOutput);

}


Bool_t MethodPyTorch::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t) {

   if (type == Types::kRegression) return kTRUE;

   if (type == Types::kClassification && numberClasses == 2) return kTRUE;

   if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;

   return kFALSE;

}


void MethodPyTorch::DeclareOptions() {

   DeclareOptionRef(fFilenameModel, "FilenameModel", "Filename of the initial PyTorch model");

   DeclareOptionRef(fFilenameTrainedModel, "FilenameTrainedModel", "Filename of the trained output PyTorch model");

   DeclareOptionRef(fBatchSize, "BatchSize", "Training batch size");

   DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs");


   DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training");

   DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss");

   DeclareOptionRef(fLearningRateSchedule, "LearningRateSchedule", "Set new learning rate during training at specific epochs, e.g., \"50,0.01;70,0.005\"");


   DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation."

                    "Specify as 0.2 or 20% to use a fifth of the data set as validation set."

                    "Specify as 100 to use exactly 100 events. (Default: 20%)");

   DeclareOptionRef(fUserCodeName = "", "UserCode", "Necessary python code provided by the user to be executed before loading and training the PyTorch Model");


}


////////////////////////////////////////////////////////////////////////////////

/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and

/// 100 etc.

///    - 20% and 0.2 selects 20% of the training set as validation data.

///    - 100 selects 100 events as the validation data.

///

/// @return number of samples in validation set

///


UInt_t TMVA::MethodPyTorch::GetNumValidationSamples()

{

   Int_t nValidationSamples = 0;

   UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();


   // Parsing + Validation

   // --------------------

   if (fNumValidationString.EndsWith("%")) {

      // Relative spec. format 20%

      TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));


      if (intValStr.IsFloat()) {

         Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;

         nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;

      } else {

         Log() << kFATAL << "Cannot parse number \"" << fNumValidationString

               << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;

      }

   } else if (fNumValidationString.IsFloat()) {

      Double_t valSizeAsDouble = fNumValidationString.Atof();


      if (valSizeAsDouble < 1.0) {

         // Relative spec. format 0.2

         nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;

      } else {

         // Absolute spec format 100 or 100.0

         nValidationSamples = valSizeAsDouble;

      }

   } else {

      Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."

            << Endl;

   }


   // Value validation

   // ----------------

   if (nValidationSamples < 0) {

      Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;

   }


   if (nValidationSamples == 0) {

      Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;

   }


   if (nValidationSamples >= (Int_t)trainingSetSize) {

      Log() << kFATAL << "Validation size \"" << fNumValidationString

            << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;

   }


   return nValidationSamples;

}


void MethodPyTorch::ProcessOptions() {

   // Set default filename for trained model if option is not used

   if (fFilenameTrainedModel.IsNull()) {

      fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".pt";

   }


   //  -  set up number of threads for CPU if NumThreads option was specified

   //     `torch.set_num_threads` sets the number of threads that can be used to

   //     perform cpu operations like conv or mm (usually used by OpenMP or MKL).


   Log() << kINFO << "Using PyTorch - setting special configuration options "  << Endl;

   PyRunString("import torch", "Error importing pytorch");


   // run these above lines also in global namespace to make them visible overall

   PyRun_String("import torch", Py_single_input, fGlobalNS, fGlobalNS);


   // check pytorch version

   PyRunString("torch_major_version = int(torch.__version__.split('.')[0])");

   PyObject *pyTorchVersion = PyDict_GetItemString(fLocalNS, "torch_major_version");

   int torchVersion = PyLong_AsLong(pyTorchVersion);

   Log() << kINFO << "Using PyTorch version " << torchVersion << Endl;


   // in case specify number of threads

   int num_threads = fNumThreads;

   if (num_threads > 0) {

      Log() << kINFO << "Setting the CPU number of threads =  "  << num_threads << Endl;


      PyRunString(TString::Format("torch.set_num_threads(%d)", num_threads));

      PyRunString(TString::Format("torch.set_num_interop_threads(%d)", num_threads));

   }


   // Setup model, either the initial model from `fFilenameModel` or

   // the trained model from `fFilenameTrainedModel`

   if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;

   SetupPyTorchModel(fContinueTraining);

}


void MethodPyTorch::SetupPyTorchModel(bool loadTrainedModel) {

   /*

    * Load PyTorch model from file

    */


   Log() << kINFO << " Setup PyTorch Model for training" << Endl;


   if (!fUserCodeName.IsNull()) {

      Log() << kINFO << " Executing user initialization code from  " << fUserCodeName << Endl;


      // run some python code provided by user for method initializations

      FILE* fp;

      fp = fopen(fUserCodeName, "r");

      if (fp) {

         PyRun_SimpleFile(fp, fUserCodeName);

         fclose(fp);

      }

      else

         Log() << kFATAL << "Input user code is not existing : " << fUserCodeName << Endl;

   }


   PyRunString("print('custom objects for loading model : ',load_model_custom_objects)");


   // Setup the training method

   PyRunString("fit = load_model_custom_objects[\"train_func\"]",

               "Failed to load train function from file. Please use key: 'train_func' and pass training loop function as the value.");

   Log() << kINFO << "Loaded pytorch train function: " << Endl;


   // Setup Optimizer. Use SGD Optimizer as Default

   PyRunString("if 'optimizer' in load_model_custom_objects:\n"

               "    optimizer = load_model_custom_objects['optimizer']\n"

               "else:\n"

               "    optimizer = torch.optim.SGD\n",

               "Please use key: 'optimizer' and pass a pytorch optimizer as the value for a custom optimizer.");

   Log() << kINFO << "Loaded pytorch optimizer: " << Endl;


   // Setup the loss criterion

   PyRunString("criterion = load_model_custom_objects[\"criterion\"]",

               "Failed to load loss function from file. Using MSE Loss as default. Please use key: 'criterion' and pass a pytorch loss function as the value.");

   Log() << kINFO << "Loaded pytorch loss function: " << Endl;


   // Setup the predict method

   PyRunString("predict = load_model_custom_objects[\"predict_func\"]",

               "Can't find user predict function object from file. Please use key: 'predict' and pass a predict function for evaluating the model as the value.");

   Log() << kINFO << "Loaded pytorch predict function: " << Endl;


   // Load already trained model or initial model

   TString filenameLoadModel;

   if (loadTrainedModel) {

      filenameLoadModel = fFilenameTrainedModel;

   }

   else {

      filenameLoadModel = fFilenameModel;

   }

   PyRunString("model = torch.jit.load('"+filenameLoadModel+"')",

               "Failed to load PyTorch model from file: "+filenameLoadModel);

   Log() << kINFO << "Loaded model from file: " << filenameLoadModel << Endl;


   // Get variables, classes and target numbers

   fNVars = GetNVariables();

   if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass)

      fNOutputs = DataInfo().GetNClasses();

   else if (GetAnalysisType() == Types::kRegression)

      fNOutputs = DataInfo().GetNTargets();

   else

      Log() << kFATAL << "Selected analysis type is not implemented" << Endl;


   if (fNVars == 0 || fNOutputs == 0) {

       Log() << kERROR << "Model does not have a number of inputs or output. Setup failed" << Endl;

       fModelIsSetup = false;

   }

   else {

      // Mark the model as setup

      fModelIsSetup = true;

   }

}


void MethodPyTorch::InitEvaluation(size_t nEvents)

{

   // initialize python arays used in the model evaluation (prediction)

   size_t inputSize = fNVars*nEvents;

   size_t outputSize = fNOutputs*nEvents;


   // Init evaluation by allocating the array with the right size


   if (inputSize > 0 && (fVals.size() != inputSize || fPyVals == nullptr)) {

      fVals.resize(inputSize);

      npy_intp dimsVals[2] = {(npy_intp)nEvents, (npy_intp)fNVars};

      if (fPyVals != nullptr) Py_DECREF(fPyVals);   // delete previous object

      fPyVals = PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (void*)fVals.data());

      if (!fPyVals)

         Log() << kFATAL << "Failed to load data to Python array" << Endl;

      PyDict_SetItemString(fLocalNS, "vals", fPyVals);

   }


   if (outputSize > 0 && ( fOutput.size() != outputSize || fPyOutput == nullptr)) {

      fOutput.resize(outputSize); // holds classification probabilities or regression output

      // allocation of Python output array is needed only for single event evaluation

      if (nEvents == 1) {

         npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)fNOutputs};

         if (fPyOutput != nullptr) Py_DECREF(fPyOutput);   // delete previous object

         fPyOutput = PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (void*)fOutput.data());

         if (!fPyOutput)

               Log() << kFATAL << "Failed to create output data Python array" << Endl;

         PyDict_SetItemString(fLocalNS, "output", fPyOutput);

      }

   }

}


void MethodPyTorch::Init() {


   TMVA::Internal::PyGILRAII raii;


   if (!PyIsInitialized()) {

      Log() << kFATAL << "Python is not initialized" << Endl;

   }

   _import_array(); // required to use numpy arrays


   // Import PyTorch

   PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed");

   PyRunString("import torch", "import PyTorch failed");

   // do import also in global namespace

   auto ret = PyRun_String("import torch", Py_single_input, fGlobalNS, fGlobalNS);

   if (!ret)

      Log() << kFATAL << "import torch in global namespace failed!" << Endl;


   // Set flag that model is not setup

   fModelIsSetup = false;

}


void MethodPyTorch::Train() {

   if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl;


   /*

    * Load training data to numpy array.

    * NOTE: These are later forced to be converted into torch tensors throught the training loop which may not be the ideal method.

    */


   UInt_t nAllEvents = Data()->GetNTrainingEvents();

   UInt_t nValEvents = GetNumValidationSamples();

   UInt_t nTrainingEvents = nAllEvents - nValEvents;


   Log() << kINFO << "Split TMVA training data in " << nTrainingEvents << " training events and "

         << nValEvents << " validation events" << Endl;


   float* trainDataX = new float[nTrainingEvents*fNVars];

   float* trainDataY = new float[nTrainingEvents*fNOutputs];

   float* trainDataWeights = new float[nTrainingEvents];

   for (UInt_t i=0; i<nTrainingEvents; i++) {

      const TMVA::Event* e = GetTrainingEvent(i);

      // Fill variables

      for (UInt_t j=0; j<fNVars; j++) {

         trainDataX[j + i*fNVars] = e->GetValue(j);

      }

      // Fill targets

      // NOTE: For classification, convert class number in one-hot vector,

      // e.g., 1 -> [0, 1] or 0 -> [1, 0] for binary classification

      if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) {

         for (UInt_t j=0; j<fNOutputs; j++) {

            trainDataY[j + i*fNOutputs] = 0;

         }

         trainDataY[e->GetClass() + i*fNOutputs] = 1;

      }

      else if (GetAnalysisType() == Types::kRegression) {

         for (UInt_t j=0; j<fNOutputs; j++) {

            trainDataY[j + i*fNOutputs] = e->GetTarget(j);

         }

      }

      else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;

      // Fill weights

      // NOTE: If no weight branch is given, this defaults to ones for all events

      trainDataWeights[i] = e->GetWeight();

   }


   npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNVars};

   npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNOutputs};

   npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents};

   PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (void*)trainDataX);

   PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (void*)trainDataY);

   PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (void*)trainDataWeights);

   PyDict_SetItemString(fLocalNS, "trainX", (PyObject*)pTrainDataX);

   PyDict_SetItemString(fLocalNS, "trainY", (PyObject*)pTrainDataY);

   PyDict_SetItemString(fLocalNS, "trainWeights", (PyObject*)pTrainDataWeights);


   /*

    * Load validation data to numpy array

    */


   // NOTE: TMVA Validation data is a subset of all the training data

   // we will not use test data for validation. They will be used for the real testing


   float* valDataX = new float[nValEvents*fNVars];

   float* valDataY = new float[nValEvents*fNOutputs];

   float* valDataWeights = new float[nValEvents];

   //validation events follows the trainig one in the TMVA training vector

   for (UInt_t i=0; i< nValEvents ; i++) {

      UInt_t ievt = nTrainingEvents + i; // TMVA event index

      const TMVA::Event* e = GetTrainingEvent(ievt);

      // Fill variables

      for (UInt_t j=0; j<fNVars; j++) {

         valDataX[j + i*fNVars] = e->GetValue(j);

      }

      // Fill targets

      if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) {

         for (UInt_t j=0; j<fNOutputs; j++) {

            valDataY[j + i*fNOutputs] = 0;

         }

         valDataY[e->GetClass() + i*fNOutputs] = 1;

      }

      else if (GetAnalysisType() == Types::kRegression) {

         for (UInt_t j=0; j<fNOutputs; j++) {

            valDataY[j + i*fNOutputs] = e->GetTarget(j);

         }

      }

      else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;

      // Fill weights

      valDataWeights[i] = e->GetWeight();

   }


   npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)fNVars};

   npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)fNOutputs};

   npy_intp dimsValWeights[1] = {(npy_intp)nValEvents};

   PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (void*)valDataX);

   PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (void*)valDataY);

   PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (void*)valDataWeights);

   PyDict_SetItemString(fLocalNS, "valX", (PyObject*)pValDataX);

   PyDict_SetItemString(fLocalNS, "valY", (PyObject*)pValDataY);

   PyDict_SetItemString(fLocalNS, "valWeights", (PyObject*)pValDataWeights);


   /*

    * Train PyTorch model

    */

   Log() << kINFO << "Print Training Model Architecture" << Endl;

   PyRunString("print(model)");


   // Setup parameters


   PyObject* pBatchSize = PyLong_FromLong(fBatchSize);

   PyObject* pNumEpochs = PyLong_FromLong(fNumEpochs);

   PyDict_SetItemString(fLocalNS, "batchSize", pBatchSize);

   PyDict_SetItemString(fLocalNS, "numEpochs", pNumEpochs);


   // Prepare PyTorch Training DataSet

   PyRunString("train_dataset = torch.utils.data.TensorDataset(torch.Tensor(trainX), torch.Tensor(trainY))",

               "Failed to create pytorch train Dataset.");

   // Prepare PyTorch Training Dataloader

   PyRunString("train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batchSize, shuffle=False)",

               "Failed to create pytorch train Dataloader.");


   // Prepare PyTorch Validation DataSet

   PyRunString("val_dataset = torch.utils.data.TensorDataset(torch.Tensor(valX), torch.Tensor(valY))",

               "Failed to create pytorch validation Dataset.");

   // Prepare PyTorch validation Dataloader

   PyRunString("val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batchSize, shuffle=False)",

               "Failed to create pytorch validation Dataloader.");


   // Learning Rate Scheduler

   if (fLearningRateSchedule!="") {

      // Setup a python dictionary with the desired learning rate steps

      PyRunString("strScheduleSteps = '"+fLearningRateSchedule+"'\n"

                  "schedulerSteps = {}\n"

                  "for c in strScheduleSteps.split(';'):\n"

                  "    x = c.split(',')\n"

                  "    schedulerSteps[int(x[0])] = float(x[1])\n",

                  "Failed to setup steps for scheduler function from string: "+fLearningRateSchedule,

                   Py_file_input);

      // Set scheduler function as piecewise function with given steps

      PyRunString("def schedule(optimizer, epoch, schedulerSteps=schedulerSteps):\n"

                  "    if epoch in schedulerSteps:\n"

                  "        for param_group in optimizer.param_groups:\n"

                  "            param_group['lr'] = float(schedulerSteps[epoch])\n",

                  "Failed to setup scheduler function with string: "+fLearningRateSchedule,

                  Py_file_input);


      Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl;

   }

   else{

      PyRunString("schedule = None; schedulerSteps = None", "Failed to set scheduler to None.");

   }


   // Save only weights with smallest validation loss

   if (fSaveBestOnly) {

      PyRunString("def save_best(model, curr_val, best_val, save_path='"+fFilenameTrainedModel+"'):\n"

                  "    if curr_val<=best_val:\n"

                  "        best_val = curr_val\n"

                  "        best_model_jitted = torch.jit.script(model)\n"

                  "        torch.jit.save(best_model_jitted, save_path)\n"

                  "    return best_val",

                  "Failed to setup training with option: SaveBestOnly");

      Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl;

   }

   else{

      PyRunString("save_best = None", "Failed to set save_best to None.");

   }


   // Note: Early Stopping should not be implemented here. Can be implemented inside train loop function by user if required.


   // Train model

   PyRunString("trained_model = fit(model, train_loader, val_loader, num_epochs=numEpochs, batch_size=batchSize,"

               "optimizer=optimizer, criterion=criterion, save_best=save_best, scheduler=(schedule, schedulerSteps))",

               "Failed to train model");


   // Note: PyTorch doesn't store training history data unlike Keras. A user can append and save the loss,

   // accuracy, other metrics etc to a file for later use.


   /*

    * Store trained model to file (only if option 'SaveBestOnly' is NOT activated,

    * because we do not want to override the best model checkpoint)

    */

   if (!fSaveBestOnly) {

      PyRunString("trained_model_jitted = torch.jit.script(trained_model)",

                  "Model not scriptable. Failed to convert to torch script.");

      PyRunString("torch.jit.save(trained_model_jitted, '"+fFilenameTrainedModel+"')",

                  "Failed to save trained model: "+fFilenameTrainedModel);

      Log() << kINFO << "Trained model written to file: " << fFilenameTrainedModel << Endl;

   }


   /*

    * Clean-up

    */


   delete[] trainDataX;

   delete[] trainDataY;

   delete[] trainDataWeights;

   delete[] valDataX;

   delete[] valDataY;

   delete[] valDataWeights;

}


void MethodPyTorch::TestClassification() {

    MethodBase::TestClassification();

}


Double_t MethodPyTorch::GetMvaValue(Double_t *errLower, Double_t *errUpper) {

   // Cannot determine error

   NoErrorCalc(errLower, errUpper);


   // Check whether the model is setup

   // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again

   if (!fModelIsSetup) {

      // Setup the trained model

      SetupPyTorchModel(true);

   }

   InitEvaluation(1);


   // Get signal probability (called mvaValue here)

   const TMVA::Event* e = GetEvent();

   for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);

   PyRunString("for i,p in enumerate(predict(model, vals)): output[i]=p\n",

               "Failed to get predictions");


   return fOutput[TMVA::Types::kSignal];

}


std::vector<Double_t> MethodPyTorch::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t /* logProgress */) {

   // Check whether the model is setup

   // NOTE: Unfortunately this is needed because during evaluation ProcessOptions is not called again

   if (!fModelIsSetup) {

      // Setup the trained model

      SetupPyTorchModel(true);

   }


   // Load data to numpy array

   Long64_t nEvents = Data()->GetNEvents();

   if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;

   if (firstEvt < 0) firstEvt = 0;

   nEvents = lastEvt-firstEvt;


   InitEvaluation(nEvents);


   assert (static_cast<Long64_t>(fVals.size()) == fNVars*nEvents);

   for (UInt_t i=0; i<nEvents; i++) {

      Data()->SetCurrentEvent(i);

      const TMVA::Event *e = GetEvent();

      for (UInt_t j=0; j<fNVars; j++) {

         fVals[j + i*fNVars] = e->GetValue(j);

      }

   }


   // Get prediction for all events

   PyObject* pModel = PyDict_GetItemString(fLocalNS, "model");

   if (pModel==0) Log() << kFATAL << "Failed to get model Python object" << Endl;


   PyObject* pPredict = PyDict_GetItemString(fLocalNS, "predict");

   if (pPredict==0) Log() << kFATAL << "Failed to get Python predict function" << Endl;


   // Using PyTorch User Defined predict function for predictions

   PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallFunctionObjArgs(pPredict, pModel, fPyVals, NULL);

   if (pPredictions==0) Log() << kFATAL << "Failed to get predictions" << Endl;


   // Load predictions to double vector

   // NOTE: The signal probability is given at the output

   std::vector<double> mvaValues(nEvents);

   float* predictionsData = (float*) PyArray_DATA(pPredictions);

   for (UInt_t i=0; i<nEvents; i++) {

      mvaValues[i] = (double) predictionsData[i*fNOutputs + TMVA::Types::kSignal];

   }


   Py_DECREF(pPredictions);


   return mvaValues;

}


std::vector<Float_t>& MethodPyTorch::GetRegressionValues() {

   // Check whether the model is setup

   // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again

   if (!fModelIsSetup){

      // Setup the model and load weights

      SetupPyTorchModel(true);

   }

   InitEvaluation(1);


   // Get regression values

   const TMVA::Event* e = GetEvent();

   for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);


   PyRunString("for i,p in enumerate(predict(model, vals)): output[i]=p\n",

               "Failed to get predictions");


   // Use inverse transformation of targets to get final regression values

   Event * eTrans = new Event(*e);

   for (UInt_t i=0; i<fNOutputs; ++i) {

      eTrans->SetTarget(i,fOutput[i]);

   }


   const Event* eTrans2 = GetTransformationHandler().InverseTransform(eTrans);

   for (UInt_t i=0; i<fNOutputs; ++i) {

      fOutput[i] = eTrans2->GetTarget(i);

   }


   return fOutput;

}


std::vector<Float_t> MethodPyTorch::GetAllRegressionValues() {


   if (!fModelIsSetup){

      // Setup the model and load weights

      SetupPyTorchModel(true);

   }


   auto nEvents = Data()->GetNEvents();

   InitEvaluation(nEvents);


   assert (static_cast<Long64_t>(fVals.size()) == fNVars*nEvents);

   assert (static_cast<Long64_t>(fOutput.size()) == fNOutputs*nEvents);

   for (UInt_t i=0; i<nEvents; i++) {

      Data()->SetCurrentEvent(i);

      const TMVA::Event *e = GetEvent();

      for (UInt_t j=0; j<fNVars; j++) {

         fVals[j + i*fNVars] = e->GetValue(j);

      }

   }


   // Get prediction for all events

   PyObject* pModel = PyDict_GetItemString(fLocalNS, "model");

   if (pModel==0) Log() << kFATAL << "Failed to get model Python object" << Endl;


   PyObject* pPredict = PyDict_GetItemString(fLocalNS, "predict");

   if (pPredict==0) Log() << kFATAL << "Failed to get Python predict function" << Endl;


   std::cout << " calling predict functon for regression \n";

   // Using PyTorch User Defined predict function for predictions

   PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallFunctionObjArgs(pPredict, pModel, fPyVals, NULL);

   if (pPredictions==0) Log() << kFATAL << "Failed to get predictions" << Endl;


   // Load predictions to double vector

   float* predictionsData = (float*) PyArray_DATA(pPredictions);


   // need to loop on events since we use an inverse transformation to get final regression values

   // this can be probably optimized

   for (UInt_t ievt = 0; ievt < nEvents; ievt++) {

      const TMVA::Event* e = GetEvent(ievt);

      Event eTrans(*e);

      for (UInt_t i = 0; i < fNOutputs; ++i) {

         eTrans.SetTarget(i,predictionsData[ievt*fNOutputs + i]);

      }

      // apply the inverse transformation

      const Event* eTrans2 = GetTransformationHandler().InverseTransform(&eTrans);

      for (UInt_t i = 0; i < fNOutputs; ++i) {

         fOutput[ievt*fNOutputs + i] = eTrans2->GetTarget(i);

      }

   }

   Py_DECREF(pPredictions);


   return fOutput;

}


std::vector<Float_t>& MethodPyTorch::GetMulticlassValues() {

   // Check whether the model is setup

   // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again

   if (!fModelIsSetup){

      // Setup the model and load weights

      SetupPyTorchModel(true);

   }

   InitEvaluation(1);


   // Get class probabilites

   const TMVA::Event* e = GetEvent();

   for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);

   PyRunString("for i,p in enumerate(predict(model, vals)): output[i]=p\n",

               "Failed to get predictions");


   return fOutput;

}


std::vector<Float_t> MethodPyTorch::GetAllMulticlassValues() {

   // Check whether the model is setup

   if (!fModelIsSetup){

      // Setup the model and load weights

      SetupPyTorchModel(true);

   }

   auto nEvents = Data()->GetNEvents();

   InitEvaluation(nEvents);


   assert (static_cast<Long64_t>(fVals.size()) == fNVars*nEvents);

   assert (static_cast<Long64_t>(fOutput.size()) == fNOutputs*nEvents);

   for (UInt_t i=0; i<nEvents; i++) {

      Data()->SetCurrentEvent(i);

      const TMVA::Event *e = GetEvent();

      for (UInt_t j=0; j<fNVars; j++) {

         fVals[j + i*fNVars] = e->GetValue(j);

      }

   }


   // Get prediction for all events

   PyObject* pModel = PyDict_GetItemString(fLocalNS, "model");

   if (pModel==0) Log() << kFATAL << "Failed to get model Python object" << Endl;


   PyObject* pPredict = PyDict_GetItemString(fLocalNS, "predict");

   if (pPredict==0) Log() << kFATAL << "Failed to get Python predict function" << Endl;


   // Using PyTorch User Defined predict function for predictions

   PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallFunctionObjArgs(pPredict, pModel, fPyVals, NULL);

   if (pPredictions==0) Log() << kFATAL << "Failed to get predictions" << Endl;


   // Load predictions to double vector

   float* predictionsData = (float*) PyArray_DATA(pPredictions);


   std::copy(predictionsData, predictionsData+nEvents*fNOutputs, fOutput.begin());


   Py_DECREF(pPredictions);


   return fOutput;

}


void MethodPyTorch::ReadModelFromFile() {

}


void MethodPyTorch::GetHelpMessage() const {

   Log() << Endl;

   Log() << "PyTorch is a scientific computing package supporting" << Endl;

   Log() << "automatic differentiation. This method wraps the training" << Endl;

   Log() << "and predictions steps of the PyTorch Python package for" << Endl;

   Log() << "TMVA, so that dataloading, preprocessing and evaluation" << Endl;

   Log() << "can be done within the TMVA system. To use this PyTorch" << Endl;

   Log() << "interface, you need to generatea model with PyTorch first." << Endl;

   Log() << "Then, this model can be loaded and trained in TMVA." << Endl;

   Log() << Endl;

}


ClassifierFactory.h

REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition ClassifierFactory.h:124

MethodPyTorch.h

PyObject
_object PyObject
Definition PyMethodBase.h:36

e
#define e(i)
Definition RSha256.hxx:103

Results.h

ret
char * ret
Definition Rotated.cxx:221

Int_t
int Int_t
Signed integer 4 bytes (int).
Definition RtypesCore.h:59

Bool_t
bool Bool_t
Boolean (0=false, 1=true) (bool).
Definition RtypesCore.h:77

kFALSE
constexpr Bool_t kFALSE
Definition RtypesCore.h:108

Double_t
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73

kTRUE
constexpr Bool_t kTRUE
Definition RtypesCore.h:107

Timer.h

Tools.h

TransformationHandler.h

VariableTransformBase.h

Long64_t

TMVA::Configurable::DeclareOptionRef
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")

TMVA::Configurable::Log
MsgLogger & Log() const
Definition Configurable.h:122

TMVA::DataSetInfo
Class that contains all the data information.
Definition DataSetInfo.h:62

TMVA::DataSetInfo::GetNClasses
UInt_t GetNClasses() const
Definition DataSetInfo.h:155

TMVA::DataSetInfo::GetNTargets
UInt_t GetNTargets() const
Definition DataSetInfo.h:128

TMVA::DataSet::GetNEvents
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206

TMVA::DataSet::GetNTrainingEvents
Long64_t GetNTrainingEvents() const
Definition DataSet.h:68

TMVA::DataSet::SetCurrentEvent
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88

TMVA::Event
Definition Event.h:51

TMVA::Event::SetTarget
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition Event.cxx:367

TMVA::Event::GetTarget
Float_t GetTarget(UInt_t itgt) const
Definition Event.h:102

TMVA::Internal::PyGILRAII
Definition MethodPyAdaBoost.cxx:47

TMVA::Internal::PyGILRAII::m_GILState
PyGILState_STATE m_GILState
Definition MethodPyAdaBoost.cxx:48

TMVA::Internal::PyGILRAII::~PyGILRAII
~PyGILRAII()
Definition MethodPyTorch.cxx:28

TMVA::Internal::PyGILRAII::PyGILRAII
PyGILRAII()
Definition MethodPyTorch.cxx:27

TMVA::MethodBase::GetName
const char * GetName() const override
Definition MethodBase.h:337

TMVA::MethodBase::GetAnalysisType
Types::EAnalysisType GetAnalysisType() const
Definition MethodBase.h:440

TMVA::MethodBase::GetEventCollection
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
Definition MethodBase.cxx:3418

TMVA::MethodBase::GetWeightFileDir
const TString & GetWeightFileDir() const
Definition MethodBase.h:495

TMVA::MethodBase::GetEvent
const Event * GetEvent() const
Definition MethodBase.h:754

TMVA::MethodBase::DataInfo
DataSetInfo & DataInfo() const
Definition MethodBase.h:413

TMVA::MethodBase::TestClassification
virtual void TestClassification()
initialization
Definition MethodBase.cxx:1201

TMVA::MethodBase::GetNVariables
UInt_t GetNVariables() const
Definition MethodBase.h:348

TMVA::MethodBase::GetTransformationHandler
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition MethodBase.h:397

TMVA::MethodBase::NoErrorCalc
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition MethodBase.cxx:900

TMVA::MethodBase::Data
DataSet * Data() const
Definition MethodBase.h:412

TMVA::MethodBase::GetTrainingEvent
const Event * GetTrainingEvent(Long64_t ievt) const
Definition MethodBase.h:774

TMVA::MethodPyTorch::InitEvaluation
void InitEvaluation(size_t nEvents)
Definition MethodPyTorch.cxx:272

TMVA::MethodPyTorch::GetHelpMessage
void GetHelpMessage() const override
Definition MethodPyTorch.cxx:761

TMVA::MethodPyTorch::~MethodPyTorch
~MethodPyTorch()
Definition MethodPyTorch.cxx:61

TMVA::MethodPyTorch::fModelIsSetup
bool fModelIsSetup
Definition MethodPyTorch.h:93

TMVA::MethodPyTorch::fNOutputs
UInt_t fNOutputs
Definition MethodPyTorch.h:97

TMVA::MethodPyTorch::Train
void Train() override
Definition MethodPyTorch.cxx:327

TMVA::MethodPyTorch::Init
void Init() override
Definition MethodPyTorch.cxx:305

TMVA::MethodPyTorch::fNumThreads
Int_t fNumThreads
Definition MethodPyTorch.h:83

TMVA::MethodPyTorch::fSaveBestOnly
Bool_t fSaveBestOnly
Definition MethodPyTorch.h:86

TMVA::MethodPyTorch::GetMvaValue
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper) override
Definition MethodPyTorch.cxx:537

TMVA::MethodPyTorch::GetRegressionValues
std::vector< Float_t > & GetRegressionValues() override
Definition MethodPyTorch.cxx:611

TMVA::MethodPyTorch::ProcessOptions
void ProcessOptions() override
Definition MethodPyTorch.cxx:153

TMVA::MethodPyTorch::HasAnalysisType
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t) override
Definition MethodPyTorch.cxx:67

TMVA::MethodPyTorch::fNVars
UInt_t fNVars
Definition MethodPyTorch.h:96

TMVA::MethodPyTorch::fOutput
std::vector< float > fOutput
Definition MethodPyTorch.h:95

TMVA::MethodPyTorch::ReadModelFromFile
void ReadModelFromFile() override
Definition MethodPyTorch.cxx:757

TMVA::MethodPyTorch::MethodPyTorch
MethodPyTorch(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
Definition MethodPyTorch.cxx:37

TMVA::MethodPyTorch::GetAllMulticlassValues
std::vector< Float_t > GetAllMulticlassValues() override
Get all multi-class values.
Definition MethodPyTorch.cxx:715

TMVA::MethodPyTorch::fUserCodeName
TString fUserCodeName
Definition MethodPyTorch.h:91

TMVA::MethodPyTorch::GetMvaValues
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) override
get all the MVA values for the events of the current Data type
Definition MethodPyTorch.cxx:560

TMVA::MethodPyTorch::fVals
std::vector< float > fVals
Definition MethodPyTorch.h:94

TMVA::MethodPyTorch::GetMulticlassValues
std::vector< Float_t > & GetMulticlassValues() override
Definition MethodPyTorch.cxx:697

TMVA::MethodPyTorch::GetAllRegressionValues
std::vector< Float_t > GetAllRegressionValues() override
Get al regression values in one call.
Definition MethodPyTorch.cxx:642

TMVA::MethodPyTorch::TestClassification
void TestClassification() override
initialization
Definition MethodPyTorch.cxx:533

TMVA::MethodPyTorch::fNumValidationString
TString fNumValidationString
Definition MethodPyTorch.h:89

TMVA::MethodPyTorch::fPyVals
PyObject * fPyVals
Definition MethodPyTorch.h:99

TMVA::MethodPyTorch::fNumEpochs
UInt_t fNumEpochs
Definition MethodPyTorch.h:82

TMVA::MethodPyTorch::GetNumValidationSamples
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
Definition MethodPyTorch.cxx:101

TMVA::MethodPyTorch::fPyOutput
PyObject * fPyOutput
Definition MethodPyTorch.h:100

TMVA::MethodPyTorch::fLearningRateSchedule
TString fLearningRateSchedule
Definition MethodPyTorch.h:87

TMVA::MethodPyTorch::fFilenameModel
TString fFilenameModel
Definition MethodPyTorch.h:80

TMVA::MethodPyTorch::fFilenameTrainedModel
TString fFilenameTrainedModel
Definition MethodPyTorch.h:98

TMVA::MethodPyTorch::SetupPyTorchModel
void SetupPyTorchModel(Bool_t loadTrainedModel)
Definition MethodPyTorch.cxx:191

TMVA::MethodPyTorch::DeclareOptions
void DeclareOptions() override
Definition MethodPyTorch.cxx:75

TMVA::MethodPyTorch::fContinueTraining
Bool_t fContinueTraining
Definition MethodPyTorch.h:85

TMVA::MethodPyTorch::fBatchSize
UInt_t fBatchSize
Definition MethodPyTorch.h:81

TMVA::PyMethodBase::PyIsInitialized
static int PyIsInitialized()
Check Python interpreter initialization status.
Definition PyMethodBase.cxx:252

TMVA::PyMethodBase::fGlobalNS
static PyObject * fGlobalNS
Definition PyMethodBase.h:126

TMVA::PyMethodBase::PyMethodBase
PyMethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
Definition PyMethodBase.cxx:83

TMVA::PyMethodBase::PyRunString
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=256)
Execute Python code from string.
Definition PyMethodBase.cxx:324

TMVA::PyMethodBase::fLocalNS
PyObject * fLocalNS
Definition PyMethodBase.h:127

TMVA::TransformationHandler::InverseTransform
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
Definition TransformationHandler.cxx:167

TMVA::Types
Singleton class for Global types used by TMVA.
Definition Types.h:71

TMVA::Types::kSignal
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Definition Types.h:135

TMVA::Types::EAnalysisType
EAnalysisType
Definition Types.h:126

TMVA::Types::kMulticlass
@ kMulticlass
Definition Types.h:129

TMVA::Types::kClassification
@ kClassification
Definition Types.h:127

TMVA::Types::kRegression
@ kRegression
Definition Types.h:128

TMVA::Types::kTraining
@ kTraining
Definition Types.h:143

TString
Basic string class.
Definition TString.h:138

TString::IsFloat
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition TString.cxx:1864

TString::kTrailing
@ kTrailing
Definition TString.h:284

TString::Format
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2385

double

TMVA::Internal
Definition MethodPyAdaBoost.cxx:46

TMVA
create variable transformations
Definition GeneticMinimizer.h:22

TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148

Config.h

Types.h