7#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 
    8#include <numpy/arrayobject.h> 
   85                    "Specify as 0.2 or 20% to use a fifth of the data set as validation set." 
   86                    "Specify as 100 to use exactly 100 events. (Default: 20%)");
 
   87   DeclareOptionRef(
fUserCodeName = 
"", 
"UserCode", 
"Necessary python code provided by the user to be executed before loading and training the PyTorch Model");
 
  102   Int_t nValidationSamples = 0;
 
  107   if (fNumValidationString.EndsWith(
"%")) {
 
  112         Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
 
  113         nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
 
  115         Log() << kFATAL << 
"Cannot parse number \"" << fNumValidationString
 
  116               << 
"\". Expected string like \"20%\" or \"20.0%\"." << 
Endl;
 
  118   } 
else if (fNumValidationString.IsFloat()) {
 
  119      Double_t valSizeAsDouble = fNumValidationString.Atof();
 
  121      if (valSizeAsDouble < 1.0) {
 
  123         nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
 
  126         nValidationSamples = valSizeAsDouble;
 
  129      Log() << kFATAL << 
"Cannot parse number \"" << fNumValidationString << 
"\". Expected string like \"0.2\" or \"100\"." 
  135   if (nValidationSamples < 0) {
 
  136      Log() << kFATAL << 
"Validation size \"" << fNumValidationString << 
"\" is negative." << 
Endl;
 
  139   if (nValidationSamples == 0) {
 
  140      Log() << kFATAL << 
"Validation size \"" << fNumValidationString << 
"\" is zero." << 
Endl;
 
  143   if (nValidationSamples >= (
Int_t)trainingSetSize) {
 
  144      Log() << kFATAL << 
"Validation size \"" << fNumValidationString
 
  145            << 
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize << 
"\")." << 
Endl;
 
  148   return nValidationSamples;
 
  162   Log() << kINFO << 
"Using PyTorch - setting special configuration options "  << 
Endl;
 
  163   PyRunString(
"import torch", 
"Error importing pytorch");
 
  169   PyRunString(
"torch_major_version = int(torch.__version__.split('.')[0])");
 
  170   PyObject *pyTorchVersion = PyDict_GetItemString(
fLocalNS, 
"torch_major_version");
 
  171   int torchVersion = PyLong_AsLong(pyTorchVersion);
 
  172   Log() << kINFO << 
"Using PyTorch version " << torchVersion << 
Endl;
 
  176   if (num_threads > 0) {
 
  177      Log() << kINFO << 
"Setting the CPU number of threads =  "  << num_threads << 
Endl;
 
  195   Log() << kINFO << 
" Setup PyTorch Model for training" << 
Endl;
 
  211   PyRunString(
"print('custom objects for loading model : ',load_model_custom_objects)");
 
  214   PyRunString(
"fit = load_model_custom_objects[\"train_func\"]",
 
  215               "Failed to load train function from file. Please use key: 'train_func' and pass training loop function as the value.");
 
  216   Log() << kINFO << 
"Loaded pytorch train function: " << 
Endl;
 
  220   PyRunString(
"if 'optimizer' in load_model_custom_objects:\n" 
  221               "    optimizer = load_model_custom_objects['optimizer']\n" 
  223               "    optimizer = torch.optim.SGD\n",
 
  224               "Please use key: 'optimizer' and pass a pytorch optimizer as the value for a custom optimizer.");
 
  225   Log() << kINFO << 
"Loaded pytorch optimizer: " << 
Endl;
 
  229   PyRunString(
"criterion = load_model_custom_objects[\"criterion\"]",
 
  230               "Failed to load loss function from file. Using MSE Loss as default. Please use key: 'criterion' and pass a pytorch loss function as the value.");
 
  231   Log() << kINFO << 
"Loaded pytorch loss function: " << 
Endl;
 
  235   PyRunString(
"predict = load_model_custom_objects[\"predict_func\"]",
 
  236               "Can't find user predict function object from file. Please use key: 'predict' and pass a predict function for evaluating the model as the value.");
 
  237   Log() << kINFO << 
"Loaded pytorch predict function: " << 
Endl;
 
  242   if (loadTrainedModel) {
 
  248   PyRunString(
"model = torch.jit.load('"+filenameLoadModel+
"')",
 
  249               "Failed to load PyTorch model from file: "+filenameLoadModel);
 
  250   Log() << kINFO << 
"Loaded model from file: " << filenameLoadModel << 
Endl;
 
  261   else Log() << kFATAL << 
"Selected analysis type is not implemented" << 
Endl;
 
  265   npy_intp dimsVals[2] = {(npy_intp)1, (npy_intp)
fNVars};
 
  266   PyArrayObject* pVals = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (
void*)
fVals);
 
  270   npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)
fNOutputs};
 
  271   PyArrayObject* pOutput = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (
void*)&
fOutput[0]);
 
  284      Log() << kFATAL << 
"Python is not initialized" << 
Endl;
 
  289   PyRunString(
"import sys; sys.argv = ['']", 
"Set sys.argv failed");
 
  290   PyRunString(
"import torch", 
"import PyTorch failed");
 
  294      Log() << kFATAL << 
"import torch in global namespace failed!" << 
Endl;
 
  311   UInt_t nTrainingEvents = nAllEvents - nValEvents;
 
  313   Log() << kINFO << 
"Split TMVA training data in " << nTrainingEvents << 
" training events and " 
  314         << nValEvents << 
" validation events" << 
Endl;
 
  316   float* trainDataX = 
new float[nTrainingEvents*
fNVars];
 
  317   float* trainDataY = 
new float[nTrainingEvents*
fNOutputs];
 
  318   float* trainDataWeights = 
new float[nTrainingEvents];
 
  319   for (
UInt_t i=0; i<nTrainingEvents; i++) {
 
  323         trainDataX[j + i*
fNVars] = 
e->GetValue(j);
 
  336            trainDataY[j + i*
fNOutputs] = 
e->GetTarget(j);
 
  339      else Log() << kFATAL << 
"Can not fill target vector because analysis type is not known" << 
Endl;
 
  342      trainDataWeights[i] = 
e->GetWeight();
 
  345   npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)
fNVars};
 
  346   npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)
fNOutputs};
 
  347   npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents};
 
  348   PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (
void*)trainDataX);
 
  349   PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (
void*)trainDataY);
 
  350   PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (
void*)trainDataWeights);
 
  353   PyDict_SetItemString(
fLocalNS, 
"trainWeights", (
PyObject*)pTrainDataWeights);
 
  363   float* valDataX = 
new float[nValEvents*
fNVars];
 
  364   float* valDataY = 
new float[nValEvents*
fNOutputs];
 
  365   float* valDataWeights = 
new float[nValEvents];
 
  367   for (
UInt_t i=0; i< nValEvents ; i++) {
 
  368      UInt_t ievt = nTrainingEvents + i; 
 
  372         valDataX[j + i*
fNVars] = 
e->GetValue(j);
 
  386      else Log() << kFATAL << 
"Can not fill target vector because analysis type is not known" << 
Endl;
 
  388      valDataWeights[i] = 
e->GetWeight();
 
  391   npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)
fNVars};
 
  392   npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)
fNOutputs};
 
  393   npy_intp dimsValWeights[1] = {(npy_intp)nValEvents};
 
  394   PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (
void*)valDataX);
 
  395   PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (
void*)valDataY);
 
  396   PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (
void*)valDataWeights);
 
  404   Log() << kINFO << 
"Print Training Model Architecture" << 
Endl;
 
  411   PyDict_SetItemString(
fLocalNS, 
"batchSize", pBatchSize);
 
  412   PyDict_SetItemString(
fLocalNS, 
"numEpochs", pNumEpochs);
 
  415   PyRunString(
"train_dataset = torch.utils.data.TensorDataset(torch.Tensor(trainX), torch.Tensor(trainY))",
 
  416               "Failed to create pytorch train Dataset.");
 
  418   PyRunString(
"train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batchSize, shuffle=False)",
 
  419               "Failed to create pytorch train Dataloader.");
 
  423   PyRunString(
"val_dataset = torch.utils.data.TensorDataset(torch.Tensor(valX), torch.Tensor(valY))",
 
  424               "Failed to create pytorch validation Dataset.");
 
  426   PyRunString(
"val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batchSize, shuffle=False)",
 
  427               "Failed to create pytorch validation Dataloader.");
 
  434                  "schedulerSteps = {}\n" 
  435                  "for c in strScheduleSteps.split(';'):\n" 
  436                  "    x = c.split(',')\n" 
  437                  "    schedulerSteps[int(x[0])] = float(x[1])\n",
 
  441      PyRunString(
"def schedule(optimizer, epoch, schedulerSteps=schedulerSteps):\n" 
  442                  "    if epoch in schedulerSteps:\n" 
  443                  "        for param_group in optimizer.param_groups:\n" 
  444                  "            param_group['lr'] = float(schedulerSteps[epoch])\n",
 
  451      PyRunString(
"schedule = None; schedulerSteps = None", 
"Failed to set scheduler to None.");
 
  458                  "    if curr_val<=best_val:\n" 
  459                  "        best_val = curr_val\n" 
  460                  "        best_model_jitted = torch.jit.script(model)\n" 
  461                  "        torch.jit.save(best_model_jitted, save_path)\n" 
  463                  "Failed to setup training with option: SaveBestOnly");
 
  464      Log() << kINFO << 
"Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << 
Endl;
 
  467      PyRunString(
"save_best = None", 
"Failed to set save_best to None.");
 
  474   PyRunString(
"trained_model = fit(model, train_loader, val_loader, num_epochs=numEpochs, batch_size=batchSize," 
  475               "optimizer=optimizer, criterion=criterion, save_best=save_best, scheduler=(schedule, schedulerSteps))",
 
  476               "Failed to train model");
 
  487      PyRunString(
"trained_model_jitted = torch.jit.script(trained_model)",
 
  488                  "Model not scriptable. Failed to convert to torch script.");
 
  500   delete[] trainDataWeights;
 
  503   delete[] valDataWeights;
 
  525   PyRunString(
"for i,p in enumerate(predict(model, vals)): output[i]=p\n",
 
  526               "Failed to get predictions");
 
  543   if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
 
  544   if (firstEvt < 0) firstEvt = 0;
 
  545   nEvents = lastEvt-firstEvt;
 
  554            << 
" sample (" << nEvents << 
" events)" << 
Endl;
 
  557   for (
UInt_t i=0; i<nEvents; i++) {
 
  565   npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)
fNVars};
 
  566   PyArrayObject* pDataMvaValues = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsData, NPY_FLOAT, (
void*)
data);
 
  567   if (pDataMvaValues==0) 
Log() << 
"Failed to load data to Python array" << 
Endl;
 
  572   if (pModel==0) 
Log() << kFATAL << 
"Failed to get model Python object" << 
Endl;
 
  575   if (pPredict==0) 
Log() << kFATAL << 
"Failed to get Python predict function" << 
Endl;
 
  579   PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallFunctionObjArgs(pPredict, pModel, pDataMvaValues, NULL);      
 
  580   if (pPredictions==0) 
Log() << kFATAL << 
"Failed to get predictions" << 
Endl;
 
  585   std::vector<double> mvaValues(nEvents);
 
  586   float* predictionsData = (
float*) PyArray_DATA(pPredictions);
 
  587   for (
UInt_t i=0; i<nEvents; i++) {
 
  593            << 
"Elapsed time for evaluation of " << nEvents <<  
" events: " 
  612   PyRunString(
"for i,p in enumerate(predict(model, vals)): output[i]=p\n",
 
  613               "Failed to get predictions");
 
  641   PyRunString(
"for i,p in enumerate(predict(model, vals)): output[i]=p\n",
 
  642               "Failed to get predictions");
 
  654   Log() << 
"PyTorch is a scientific computing package supporting" << 
Endl;
 
  655   Log() << 
"automatic differentiation. This method wraps the training" << 
Endl;
 
  656   Log() << 
"and predictions steps of the PyTorch Python package for" << 
Endl;
 
  657   Log() << 
"TMVA, so that dataloading, preprocessing and evaluation" << 
Endl;
 
  658   Log() << 
"can be done within the TMVA system. To use this PyTorch" << 
Endl;
 
  659   Log() << 
"interface, you need to generatea model with PyTorch first." << 
Endl;
 
  660   Log() << 
"Then, this model can be loaded and trained in TMVA." << 
Endl;
 
#define REGISTER_METHOD(CLASS)
for example
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
 
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
 
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
 
Class that contains all the data information.
 
UInt_t GetNClasses() const
 
UInt_t GetNTargets() const
 
Types::ETreeType GetCurrentType() const
 
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
 
Long64_t GetNTrainingEvents() const
 
void SetCurrentEvent(Long64_t ievt) const
 
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
 
Float_t GetTarget(UInt_t itgt) const
 
PyGILState_STATE m_GILState
 
const char * GetName() const
 
Types::EAnalysisType GetAnalysisType() const
 
const TString & GetWeightFileDir() const
 
const TString & GetMethodName() const
 
const Event * GetEvent() const
 
DataSetInfo & DataInfo() const
 
virtual void TestClassification()
initialization
 
UInt_t GetNVariables() const
 
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
 
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
 
const Event * GetTrainingEvent(Long64_t ievt) const
 
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
 
std::vector< Float_t > & GetMulticlassValues()
 
std::vector< float > fOutput
 
MethodPyTorch(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
 
virtual void TestClassification()
initialization
 
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
get all the MVA values for the events of the current Data type
 
TString fNumValidationString
 
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
 
void GetHelpMessage() const
 
TString fLearningRateSchedule
 
std::vector< Float_t > & GetRegressionValues()
 
TString fFilenameTrainedModel
 
void SetupPyTorchModel(Bool_t loadTrainedModel)
 
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper)
 
static int PyIsInitialized()
Check Python interpreter initialization status.
 
static PyObject * fGlobalNS
 
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=256)
Execute Python code from string.
 
Timing information for training and evaluation of MVA methods.
 
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
 
Singleton class for Global types used by TMVA.
 
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
 
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
 
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
 
create variable transformations
 
MsgLogger & Endl(MsgLogger &ml)