doc/v618/MethodDL_8cxx_source.html

// @(#)root/tmva/tmva/cnn:$Id$Ndl

// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran

/**********************************************************************************

 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *

 * Package: TMVA                                                                  *

 * Class  : MethodDL                                                              *

 * Web    : http://tmva.sourceforge.net                                           *

 *                                                                                *

 * Description:                                                                   *

 *      Deep Neural Network Method                                                *

 *                                                                                *

 * Authors (alphabetical):                                                        *

 *      Vladimir Ilievski  <ilievski.vladimir@live.com> - CERN, Switzerland       *

 *      Saurav Shekhar     <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland  *

 *      Ravi Kiran S       <sravikiran0606@gmail.com> - CERN, Switzerland         *

 *                                                                                *

 * Copyright (c) 2005-2015:                                                       *

 *      CERN, Switzerland                                                         *

 *      U. of Victoria, Canada                                                    *

 *      MPI-K Heidelberg, Germany                                                 *

 *      U. of Bonn, Germany                                                       *

 *                                                                                *

 * Redistribution and use in source and binary forms, with or without             *

 * modification, are permitted according to the terms listed in LICENSE           *

 * (http://tmva.sourceforge.net/LICENSE)                                          *

 **********************************************************************************/


#include "TFormula.h"

#include "TString.h"

#include "TMath.h"


#include "TMVA/Tools.h"

#include "TMVA/Configurable.h"

#include "TMVA/IMethod.h"

#include "TMVA/ClassifierFactory.h"

#include "TMVA/MethodDL.h"

#include "TMVA/Types.h"

#include "TMVA/DNN/TensorDataLoader.h"

#include "TMVA/DNN/Functions.h"

#include "TMVA/DNN/DLMinimizers.h"

#include "TMVA/DNN/SGD.h"

#include "TMVA/DNN/Adam.h"

#include "TMVA/DNN/Adagrad.h"

#include "TMVA/DNN/RMSProp.h"

#include "TMVA/DNN/Adadelta.h"

#include "TMVA/Timer.h"


#include "TStopwatch.h"


#include <chrono>


REGISTER_METHOD(DL)

ClassImp(TMVA::MethodDL);


using namespace TMVA::DNN::CNN;

using namespace TMVA::DNN;


using TMVA::DNN::EActivationFunction;

using TMVA::DNN::ELossFunction;

using TMVA::DNN::EInitialization;

using TMVA::DNN::EOutputFunction;

using TMVA::DNN::EOptimizer;


namespace TMVA {


////////////////////////////////////////////////////////////////////////////////

TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)

{

   key.ToUpper();

   std::map<TString, TString>::const_iterator it = keyValueMap.find(key);

   if (it == keyValueMap.end()) {

      return TString("");

   }

   return it->second;

}


////////////////////////////////////////////////////////////////////////////////

template <typename T>

T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);


////////////////////////////////////////////////////////////////////////////////

template <>

int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)

{

   TString value(fetchValueTmp(keyValueMap, key));

   if (value == "") {

      return defaultValue;

   }

   return value.Atoi();

}


////////////////////////////////////////////////////////////////////////////////

template <>

double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)

{

   TString value(fetchValueTmp(keyValueMap, key));

   if (value == "") {

      return defaultValue;

   }

   return value.Atof();

}


////////////////////////////////////////////////////////////////////////////////

template <>

TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)

{

   TString value(fetchValueTmp(keyValueMap, key));

   if (value == "") {

      return defaultValue;

   }

   return value;

}


////////////////////////////////////////////////////////////////////////////////

template <>

bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)

{

   TString value(fetchValueTmp(keyValueMap, key));

   if (value == "") {

      return defaultValue;

   }


   value.ToUpper();

   if (value == "TRUE" || value == "T" || value == "1") {

      return true;

   }


   return false;

}


////////////////////////////////////////////////////////////////////////////////

template <>

std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,

                                  std::vector<double> defaultValue)

{

   TString parseString(fetchValueTmp(keyValueMap, key));

   if (parseString == "") {

      return defaultValue;

   }


   parseString.ToUpper();

   std::vector<double> values;


   const TString tokenDelim("+");

   TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);

   TIter nextToken(tokenStrings);

   TObjString *tokenString = (TObjString *)nextToken();

   for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {

      std::stringstream sstr;

      double currentValue;

      sstr << tokenString->GetString().Data();

      sstr >> currentValue;

      values.push_back(currentValue);

   }

   return values;

}


////////////////////////////////////////////////////////////////////////////////

void MethodDL::DeclareOptions()

{

   // Set default values for all option strings


   DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");


   DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");


   DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");


   DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"

                                                                      " or cross entropy (binary classification).");

   AddPreDefVal(TString("CROSSENTROPY"));

   AddPreDefVal(TString("SUMOFSQUARES"));

   AddPreDefVal(TString("MUTUALEXCLUSIVE"));


   DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");

   AddPreDefVal(TString("XAVIER"));

   AddPreDefVal(TString("XAVIERUNIFORM"));


   DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");


   DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "

                    "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "

                    "Specify as 100 to use exactly 100 events. (Default: 20%)");


   DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");

   AddPreDefVal(TString("STANDARD"));

   AddPreDefVal(TString("CPU"));

   AddPreDefVal(TString("GPU"));

   AddPreDefVal(TString("OPENCL"));


   // define training stratgey separated by a separator "|"

   DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-1,"

                                              "Momentum=0.3,"

                                              "Repetitions=3,"

                                              "ConvergenceSteps=50,"

                                              "BatchSize=30,"

                                              "TestRepetitions=7,"

                                              "WeightDecay=0.0,"

                                              "Regularization=None,"

                                              "DropConfig=0.0,"

                                              "DropRepetitions=5"

                                              "|"

                                              "LearningRate=1e-4,"

                                              "Momentum=0.3,"

                                              "Repetitions=3,"

                                              "ConvergenceSteps=50,"

                                              "MaxEpochs=2000,"

                                              "BatchSize=20,"

                                              "TestRepetitions=7,"

                                              "WeightDecay=0.001,"

                                              "Regularization=L2,"

                                              "DropConfig=0.0+0.5+0.5,"

                                              "DropRepetitions=5,"

                                              "Multithreading=True",

                    "TrainingStrategy", "Defines the training strategies.");

}


////////////////////////////////////////////////////////////////////////////////

void MethodDL::ProcessOptions()

{


   if (IgnoreEventsWithNegWeightsInTraining()) {

      Log() << kINFO << "Will ignore negative events in training!" << Endl;

   }


   if (fArchitectureString == "STANDARD") {

      Log() << kINFO << "The STANDARD architecture has been deprecated. "

                         "Please use Architecture=CPU or Architecture=CPU."

                         "See the TMVA Users' Guide for instructions if you "

                         "encounter problems."

            << Endl;

   }

   if (fArchitectureString == "OPENCL") {

      Log() << kERROR << "The OPENCL architecture has not been implemented yet. "

                         "Please use Architecture=CPU or Architecture=CPU for the "

                         "time being. See the TMVA Users' Guide for instructions "

                         "if you encounter problems."

            << Endl;

   }


   // the architecture can now be set at runtime as an option


   if (fArchitectureString == "GPU") {

#ifndef R__HAS_TMVAGPU    // case TMVA does not support GPU

      Log() << kERROR << "CUDA backend not enabled. Please make sure "

         "you have CUDA installed and it was successfully "

         "detected by CMAKE by using -Dcuda=On "

            << Endl;

#ifdef R__HAS_TMVACPU

      fArchitectureString = "CPU";

      Log() << kINFO << "Will use now the CPU architecture !" << Endl;

#else

      fArchitectureString = "Standard";

      Log() << kINFO << "Will use now the Standard architecture !" << Endl;

#endif

#else

      Log() << kINFO << "Will use now the GPU architecture !" << Endl;

#endif

   }


   else if (fArchitectureString == "CPU") {

#ifndef R__HAS_TMVACPU  // TMVA has no CPU support

      Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "

                          "you have a BLAS implementation and it was successfully "

                         "detected by CMake as well that the imt CMake flag is set."

            << Endl;

#ifdef R__HAS_TMVAGPU

      fArchitectureString = "GPU";

      Log() << kINFO << "Will use now the GPU architecture !" << Endl;

#else

      fArchitectureString = "STANDARD";

      Log() << kINFO << "Will use now the Standard architecture !" << Endl;

#endif

#else

      Log() << kINFO << "Will use now the CPU architecture !" << Endl;

#endif

   }


   else {

      Log() << kINFO << "Will use the deprecated STANDARD architecture !" << Endl;

      fArchitectureString = "STANDARD";

   }


   // Input Layout

   ParseInputLayout();

   ParseBatchLayout();


   // Loss function and output.

   fOutputFunction = EOutputFunction::kSigmoid;

   if (fAnalysisType == Types::kClassification) {

      if (fErrorStrategy == "SUMOFSQUARES") {

         fLossFunction = ELossFunction::kMeanSquaredError;

      }

      if (fErrorStrategy == "CROSSENTROPY") {

         fLossFunction = ELossFunction::kCrossEntropy;

      }

      fOutputFunction = EOutputFunction::kSigmoid;

   } else if (fAnalysisType == Types::kRegression) {

      if (fErrorStrategy != "SUMOFSQUARES") {

         Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "

               << " neural net error function. Setting error function to "

               << " SUMOFSQUARES now." << Endl;

      }


      fLossFunction = ELossFunction::kMeanSquaredError;

      fOutputFunction = EOutputFunction::kIdentity;

   } else if (fAnalysisType == Types::kMulticlass) {

      if (fErrorStrategy == "SUMOFSQUARES") {

         fLossFunction = ELossFunction::kMeanSquaredError;

      }

      if (fErrorStrategy == "CROSSENTROPY") {

         fLossFunction = ELossFunction::kCrossEntropy;

      }

      if (fErrorStrategy == "MUTUALEXCLUSIVE") {

         fLossFunction = ELossFunction::kSoftmaxCrossEntropy;

      }

      fOutputFunction = EOutputFunction::kSoftmax;

   }


   // Initialization

   // the biases will be always initialized to zero

   if (fWeightInitializationString == "XAVIER") {

      fWeightInitialization = DNN::EInitialization::kGlorotNormal;

   } else if (fWeightInitializationString == "XAVIERUNIFORM") {

      fWeightInitialization = DNN::EInitialization::kGlorotUniform;

   } else if (fWeightInitializationString == "GAUSS") {

      fWeightInitialization = DNN::EInitialization::kGauss;

   } else if (fWeightInitializationString == "UNIFORM") {

      fWeightInitialization = DNN::EInitialization::kUniform;

   } else if (fWeightInitializationString == "ZERO") {

      fWeightInitialization = DNN::EInitialization::kZero;

   } else {

      fWeightInitialization = DNN::EInitialization::kGlorotUniform;

   }


   // Training settings.


   KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString, TString("|"), TString(","));

   for (auto &block : strategyKeyValues) {

      TTrainingSettings settings;


      settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);

      settings.batchSize = fetchValueTmp(block, "BatchSize", 30);

      settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);

      settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);

      settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);

      settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);

      settings.momentum = fetchValueTmp(block, "Momentum", 0.3);

      settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());


      TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));

      if (regularization == "L1") {

         settings.regularization = DNN::ERegularization::kL1;

      } else if (regularization == "L2") {

         settings.regularization = DNN::ERegularization::kL2;

      } else {

         settings.regularization = DNN::ERegularization::kNone;

      }


      TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));

      settings.optimizerName = optimizer;

      if (optimizer == "SGD") {

         settings.optimizer = DNN::EOptimizer::kSGD;

      } else if (optimizer == "ADAM") {

         settings.optimizer = DNN::EOptimizer::kAdam;

      } else if (optimizer == "ADAGRAD") {

         settings.optimizer = DNN::EOptimizer::kAdagrad;

      } else if (optimizer == "RMSPROP") {

         settings.optimizer = DNN::EOptimizer::kRMSProp;

      } else if (optimizer == "ADADELTA") {

         settings.optimizer = DNN::EOptimizer::kAdadelta;

      } else {

         // Make Adam as default choice if the input string is

         // incorrect.

         settings.optimizer = DNN::EOptimizer::kAdam;

         settings.optimizerName = "ADAM";

      }


      TString strMultithreading = fetchValueTmp(block, "Multithreading", TString("True"));


      if (strMultithreading.BeginsWith("T")) {

         settings.multithreading = true;

      } else {

         settings.multithreading = false;

      }


      fTrainingSettings.push_back(settings);

   }


   // case inputlayout and batch layout was not given. Use default then

   // (1, batchsize, nvariables)

   if (fInputWidth == 0 && fInputHeight == 0 && fInputDepth == 0) {

      fInputDepth = 1;

      fInputHeight = 1;

      fInputWidth = GetNVariables();

   }

   if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {

      if (fInputHeight == 1 && fInputDepth == 1) {

         // case of (1, batchsize, input features)

         fBatchDepth = 1;

         fBatchHeight = fTrainingSettings.front().batchSize;

         fBatchWidth = fInputWidth;

      }

      else { // more general cases (e.g. for CNN)

         fBatchDepth = fTrainingSettings.front().batchSize;

         fBatchHeight = fInputDepth;

         fBatchWidth = fInputWidth*fInputHeight;

      }

   }

}


////////////////////////////////////////////////////////////////////////////////

/// default initializations

void MethodDL::Init()

{

   // Nothing to do here

}


////////////////////////////////////////////////////////////////////////////////

/// Parse the input layout

void MethodDL::ParseInputLayout()

{

   // Define the delimiter

   const TString delim("|");


   // Get the input layout string

   TString inputLayoutString = this->GetInputLayoutString();


   size_t depth = 0;

   size_t height = 0;

   size_t width = 0;


   // Split the input layout string

   TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);

   TIter nextInputDim(inputDimStrings);

   TObjString *inputDimString = (TObjString *)nextInputDim();

   int idxToken = 0;


   for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {

      switch (idxToken) {

      case 0: // input depth

      {

         TString strDepth(inputDimString->GetString());

         depth = (size_t)strDepth.Atoi();

      } break;

      case 1: // input height

      {

         TString strHeight(inputDimString->GetString());

         height = (size_t)strHeight.Atoi();

      } break;

      case 2: // input width

      {

         TString strWidth(inputDimString->GetString());

         width = (size_t)strWidth.Atoi();

      } break;

      }

      ++idxToken;

   }


   this->SetInputDepth(depth);

   this->SetInputHeight(height);

   this->SetInputWidth(width);

}


////////////////////////////////////////////////////////////////////////////////

/// Parse the input layout

void MethodDL::ParseBatchLayout()

{

   // Define the delimiter

   const TString delim("|");


   // Get the input layout string

   TString batchLayoutString = this->GetBatchLayoutString();


   size_t batchDepth = 0;

   size_t batchHeight = 0;

   size_t batchWidth = 0;


   // Split the input layout string

   TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);

   TIter nextBatchDim(batchDimStrings);

   TObjString *batchDimString = (TObjString *)nextBatchDim();

   int idxToken = 0;


   for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {

      switch (idxToken) {

      case 0: // input depth

      {

         TString strDepth(batchDimString->GetString());

         batchDepth = (size_t)strDepth.Atoi();

      } break;

      case 1: // input height

      {

         TString strHeight(batchDimString->GetString());

         batchHeight = (size_t)strHeight.Atoi();

      } break;

      case 2: // input width

      {

         TString strWidth(batchDimString->GetString());

         batchWidth = (size_t)strWidth.Atoi();

      } break;

      }

      ++idxToken;

   }


   this->SetBatchDepth(batchDepth);

   this->SetBatchHeight(batchHeight);

   this->SetBatchWidth(batchWidth);

}


////////////////////////////////////////////////////////////////////////////////

/// Create a deep net based on the layout string

template <typename Architecture_t, typename Layer_t>

void MethodDL::CreateDeepNet(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,

                             std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets)

{

   // Layer specification, layer details

   const TString layerDelimiter(",");

   const TString subDelimiter("|");


   TString layoutString = this->GetLayoutString();


   //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;


   // Split layers

   TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);

   TIter nextLayer(layerStrings);

   TObjString *layerString = (TObjString *)nextLayer();


   for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {


      // Split layer details

      TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);

      TIter nextToken(subStrings);

      TObjString *token = (TObjString *)nextToken();


      // Determine the type of the layer

      TString strLayerType = token->GetString();


      if (strLayerType == "DENSE") {

         ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);

      } else if (strLayerType == "CONV") {

         ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);

      } else if (strLayerType == "MAXPOOL") {

         ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);

      } else if (strLayerType == "RESHAPE") {

         ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);

      } else if (strLayerType == "RNN") {

         ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter);

      // } else if (strLayerType == "LSTM") {

      //    Log() << kError << "LSTM Layer is not yet fully implemented" << Endl;

      //    //ParseLstmLayer(deepNet, nets, layerString->GetString(), subDelimiter);

      //    break;

      } else {

         // no type of layer specified - assume is dense layer as in old DNN interface

         ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);

      }

   }

}


////////////////////////////////////////////////////////////////////////////////

/// Pases the layer string and creates the appropriate dense layer

template <typename Architecture_t, typename Layer_t>

void MethodDL::ParseDenseLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,

                               std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,

                               TString delim)

{

   int width = 0;

   EActivationFunction activationFunction = EActivationFunction::kTanh;


   // this return number of input variables for the method

   // it can be used to deduce width of dense layer if specified as N+10

   // where N is the number of input variables

   const size_t inputSize = GetNvar();


   // Split layer details

   TObjArray *subStrings = layerString.Tokenize(delim);

   TIter nextToken(subStrings);

   TObjString *token = (TObjString *)nextToken();

   int idxToken = 0;


   // loop on the tokens

   // order of sepcifying width and activation function is not relevant

   // both  100|TANH and TANH|100 are valid cases

   for (; token != nullptr; token = (TObjString *)nextToken()) {

      idxToken++;

      // try a match with the activation function

      TString strActFnc(token->GetString());

      // if first token defines the layer type- skip it

      if (strActFnc =="DENSE") continue;


      if (strActFnc == "RELU") {

         activationFunction = DNN::EActivationFunction::kRelu;

      } else if (strActFnc == "TANH") {

         activationFunction = DNN::EActivationFunction::kTanh;

      } else if (strActFnc == "SYMMRELU") {

         activationFunction = DNN::EActivationFunction::kSymmRelu;

      } else if (strActFnc == "SOFTSIGN") {

         activationFunction = DNN::EActivationFunction::kSoftSign;

      } else if (strActFnc == "SIGMOID") {

         activationFunction = DNN::EActivationFunction::kSigmoid;

      } else if (strActFnc == "LINEAR") {

         activationFunction = DNN::EActivationFunction::kIdentity;

      } else if (strActFnc == "GAUSS") {

         activationFunction = DNN::EActivationFunction::kGauss;

      } else if (width == 0) {

         // no match found try to parse as text showing the width

         // support for input a formula where the variable 'x' is 'N' in the string

         // use TFormula for the evaluation

         TString  strNumNodes = strActFnc;

         // number of nodes

         TString strN("x");

         strNumNodes.ReplaceAll("N", strN);

         strNumNodes.ReplaceAll("n", strN);

         TFormula fml("tmp", strNumNodes);

         width = fml.Eval(inputSize);

      }


   }

   // avoid zero width. assume is 1

   if (width == 0) width = 1;


   // Add the dense layer, initialize the weights and biases and copy

   TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);

   denseLayer->Initialize();


   // add same layer to fNet

   if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);


   //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);


   // add the copy to all slave nets

   //for (size_t i = 0; i < nets.size(); i++) {

   //   nets[i].AddDenseLayer(copyDenseLayer);

   //}


   // check compatibility of added layer

   // for a dense layer input should be 1 x 1 x DxHxW

}


////////////////////////////////////////////////////////////////////////////////

/// Pases the layer string and creates the appropriate convolutional layer

template <typename Architecture_t, typename Layer_t>

void MethodDL::ParseConvLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,

                              std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,

                              TString delim)

{

   int depth = 0;

   int fltHeight = 0;

   int fltWidth = 0;

   int strideRows = 0;

   int strideCols = 0;

   int zeroPadHeight = 0;

   int zeroPadWidth = 0;

   EActivationFunction activationFunction = EActivationFunction::kTanh;


   // Split layer details

   TObjArray *subStrings = layerString.Tokenize(delim);

   TIter nextToken(subStrings);

   TObjString *token = (TObjString *)nextToken();

   int idxToken = 0;


   for (; token != nullptr; token = (TObjString *)nextToken()) {

      switch (idxToken) {

      case 1: // depth

      {

         TString strDepth(token->GetString());

         depth = strDepth.Atoi();

      } break;

      case 2: // filter height

      {

         TString strFltHeight(token->GetString());

         fltHeight = strFltHeight.Atoi();

      } break;

      case 3: // filter width

      {

         TString strFltWidth(token->GetString());

         fltWidth = strFltWidth.Atoi();

      } break;

      case 4: // stride in rows

      {

         TString strStrideRows(token->GetString());

         strideRows = strStrideRows.Atoi();

      } break;

      case 5: // stride in cols

      {

         TString strStrideCols(token->GetString());

         strideCols = strStrideCols.Atoi();

      } break;

      case 6: // zero padding height

      {

         TString strZeroPadHeight(token->GetString());

         zeroPadHeight = strZeroPadHeight.Atoi();

      } break;

      case 7: // zero padding width

      {

         TString strZeroPadWidth(token->GetString());

         zeroPadWidth = strZeroPadWidth.Atoi();

      } break;

      case 8: // activation function

      {

         TString strActFnc(token->GetString());

         if (strActFnc == "RELU") {

            activationFunction = DNN::EActivationFunction::kRelu;

         } else if (strActFnc == "TANH") {

            activationFunction = DNN::EActivationFunction::kTanh;

         } else if (strActFnc == "SYMMRELU") {

            activationFunction = DNN::EActivationFunction::kSymmRelu;

         } else if (strActFnc == "SOFTSIGN") {

            activationFunction = DNN::EActivationFunction::kSoftSign;

         } else if (strActFnc == "SIGMOID") {

            activationFunction = DNN::EActivationFunction::kSigmoid;

         } else if (strActFnc == "LINEAR") {

            activationFunction = DNN::EActivationFunction::kIdentity;

         } else if (strActFnc == "GAUSS") {

            activationFunction = DNN::EActivationFunction::kGauss;

         }

      } break;

      }

      ++idxToken;

   }


   // Add the convolutional layer, initialize the weights and biases and copy

   TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,

                                                                zeroPadHeight, zeroPadWidth, activationFunction);

   convLayer->Initialize();


   // Add same layer to fNet

   if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,

                      zeroPadHeight, zeroPadWidth, activationFunction);


   //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);


   //// add the copy to all slave nets

   //for (size_t i = 0; i < nets.size(); i++) {

   //   nets[i].AddConvLayer(copyConvLayer);

   //}

}


////////////////////////////////////////////////////////////////////////////////

/// Pases the layer string and creates the appropriate max pool layer

template <typename Architecture_t, typename Layer_t>

void MethodDL::ParseMaxPoolLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,

                                 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,

                                 TString delim)

{


   int filterHeight = 0;

   int filterWidth = 0;

   int strideRows = 0;

   int strideCols = 0;


   // Split layer details

   TObjArray *subStrings = layerString.Tokenize(delim);

   TIter nextToken(subStrings);

   TObjString *token = (TObjString *)nextToken();

   int idxToken = 0;


   for (; token != nullptr; token = (TObjString *)nextToken()) {

      switch (idxToken) {

      case 1: // filter height

      {

         TString strFrmHeight(token->GetString());

         filterHeight = strFrmHeight.Atoi();

      } break;

      case 2: // filter width

      {

         TString strFrmWidth(token->GetString());

         filterWidth = strFrmWidth.Atoi();

      } break;

      case 3: // stride in rows

      {

         TString strStrideRows(token->GetString());

         strideRows = strStrideRows.Atoi();

      } break;

      case 4: // stride in cols

      {

         TString strStrideCols(token->GetString());

         strideCols = strStrideCols.Atoi();

      } break;

      }

      ++idxToken;

   }


   // Add the Max pooling layer

   // TMaxPoolLayer<Architecture_t> *maxPoolLayer =

   deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);


   // Add the same layer to fNet

   if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);


   //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);


   //// add the copy to all slave nets

   //for (size_t i = 0; i < nets.size(); i++) {

   //   nets[i].AddMaxPoolLayer(copyMaxPoolLayer);

   //}

}


////////////////////////////////////////////////////////////////////////////////

/// Pases the layer string and creates the appropriate reshape layer

template <typename Architecture_t, typename Layer_t>

void MethodDL::ParseReshapeLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,

                                 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,

                                 TString delim)

{

   int depth = 0;

   int height = 0;

   int width = 0;

   bool flattening = false;


   // Split layer details

   TObjArray *subStrings = layerString.Tokenize(delim);

   TIter nextToken(subStrings);

   TObjString *token = (TObjString *)nextToken();

   int idxToken = 0;


   for (; token != nullptr; token = (TObjString *)nextToken()) {

      if (token->GetString() == "FLAT") idxToken=4;

      switch (idxToken) {

      case 1: {

         TString strDepth(token->GetString());

         depth = strDepth.Atoi();

      } break;

      case 2: // height

      {

         TString strHeight(token->GetString());

         height = strHeight.Atoi();

      } break;

      case 3: // width

      {

         TString strWidth(token->GetString());

         width = strWidth.Atoi();

      } break;

      case 4: // flattening

      {

         TString flat(token->GetString());

         if (flat == "FLAT") {

            flattening = true;

         }

      } break;

      }

      ++idxToken;

   }


   // Add the reshape layer

   // TReshapeLayer<Architecture_t> *reshapeLayer =

   deepNet.AddReshapeLayer(depth, height, width, flattening);


   // Add the same layer to fNet

   if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);


   //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);


   //// add the copy to all slave nets

   //for (size_t i = 0; i < nets.size(); i++) {

   //   nets[i].AddReshapeLayer(copyReshapeLayer);

   //}

}


////////////////////////////////////////////////////////////////////////////////

/// Pases the layer string and creates the appropriate rnn layer

template <typename Architecture_t, typename Layer_t>

void MethodDL::ParseRnnLayer(DNN::TDeepNet<Architecture_t, Layer_t> & deepNet,

                             std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,

                             TString delim)

{

   //    int depth = 0;

   int stateSize = 0;

   int inputSize = 0;

   int timeSteps = 0;

   bool rememberState = false;


   // Split layer details

   TObjArray *subStrings = layerString.Tokenize(delim);

   TIter nextToken(subStrings);

   TObjString *token = (TObjString *)nextToken();

   int idxToken = 0;


   for (; token != nullptr; token = (TObjString *)nextToken()) {

      switch (idxToken) {

         case 1:  // state size

         {

            TString strstateSize(token->GetString());

            stateSize = strstateSize.Atoi();

         } break;

         case 2:  // input size

         {

            TString strinputSize(token->GetString());

            inputSize = strinputSize.Atoi();

         } break;

         case 3:  // time steps

         {

            TString strtimeSteps(token->GetString());

            timeSteps = strtimeSteps.Atoi();

         }

         case 4: // remember state (1 or 0)

         {

            TString strrememberState(token->GetString());

            rememberState = (bool) strrememberState.Atoi();

         } break;

      }

      ++idxToken;

   }


   // Add the recurrent layer, initialize the weights and biases and copy

   TBasicRNNLayer<Architecture_t> *basicRNNLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize,

                                                                        timeSteps, rememberState);

   basicRNNLayer->Initialize();


   // Add same layer to fNet

   if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);


   //TBasicRNNLayer<Architecture_t> *copyRNNLayer = new TBasicRNNLayer<Architecture_t>(*basicRNNLayer);


   //// add the copy to all slave nets

   //for (size_t i = 0; i < nets.size(); i++) {

   //   nets[i].AddBasicRNNLayer(copyRNNLayer);

   //}

}


////////////////////////////////////////////////////////////////////////////////

/// Pases the layer string and creates the appropriate lstm layer

template <typename Architecture_t, typename Layer_t>

void MethodDL::ParseLstmLayer(DNN::TDeepNet<Architecture_t, Layer_t> & /*deepNet*/,

                              std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,

                              TString delim)

{

   // Split layer details

   TObjArray *subStrings = layerString.Tokenize(delim);

   TIter nextToken(subStrings);

   TObjString *token = (TObjString *)nextToken();

   int idxToken = 0;


   for (; token != nullptr; token = (TObjString *)nextToken()) {

      switch (idxToken) {

      }

      ++idxToken;

   }

}


////////////////////////////////////////////////////////////////////////////////

/// Standard constructor.

MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)

   : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputDepth(), fInputHeight(), fInputWidth(),

     fBatchDepth(), fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(),

     fInputLayoutString(), fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(),

     fWeightInitializationString(), fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()

{

   // Nothing to do here

}


////////////////////////////////////////////////////////////////////////////////

/// Constructor from a weight file.

MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)

   : MethodBase(Types::kDL, theData, theWeightFile), fInputDepth(), fInputHeight(), fInputWidth(), fBatchDepth(),

     fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(), fInputLayoutString(),

     fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),

     fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()

{

   // Nothing to do here

}


////////////////////////////////////////////////////////////////////////////////

/// Destructor.

MethodDL::~MethodDL()

{

   // Nothing to do here

}


////////////////////////////////////////////////////////////////////////////////

/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.

auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t

{

   // remove empty spaces

   parseString.ReplaceAll(" ","");

   KeyValueVector_t blockKeyValues;

   const TString keyValueDelim("=");


   TObjArray *blockStrings = parseString.Tokenize(blockDelim);

   TIter nextBlock(blockStrings);

   TObjString *blockString = (TObjString *)nextBlock();


   for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {

      blockKeyValues.push_back(std::map<TString, TString>());

      std::map<TString, TString> &currentBlock = blockKeyValues.back();


      TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);

      TIter nextToken(subStrings);

      TObjString *token = (TObjString *)nextToken();


      for (; token != nullptr; token = (TObjString *)nextToken()) {

         TString strKeyValue(token->GetString());

         int delimPos = strKeyValue.First(keyValueDelim.Data());

         if (delimPos <= 0) continue;


         TString strKey = TString(strKeyValue(0, delimPos));

         strKey.ToUpper();

         TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));


         strKey.Strip(TString::kBoth, ' ');

         strValue.Strip(TString::kBoth, ' ');


         currentBlock.insert(std::make_pair(strKey, strValue));

      }

   }

   return blockKeyValues;

}


////////////////////////////////////////////////////////////////////////////////

/// What kind of analysis type can handle the CNN

Bool_t MethodDL::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/)

{

   if (type == Types::kClassification && numberClasses == 2) return kTRUE;

   if (type == Types::kMulticlass) return kTRUE;

   if (type == Types::kRegression) return kTRUE;


   return kFALSE;

}


////////////////////////////////////////////////////////////////////////////////

/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and

/// 100 etc.

///    - 20% and 0.2 selects 20% of the training set as validation data.

///    - 100 selects 100 events as the validation data.

///

/// @return number of samples in validation set

///

UInt_t TMVA::MethodDL::GetNumValidationSamples()

{

   Int_t nValidationSamples = 0;

   UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();


   // Parsing + Validation

   // --------------------

   if (fNumValidationString.EndsWith("%")) {

      // Relative spec. format 20%

      TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));


      if (intValStr.IsFloat()) {

         Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;

         nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;

      } else {

         Log() << kFATAL << "Cannot parse number \"" << fNumValidationString

               << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;

      }

   } else if (fNumValidationString.IsFloat()) {

      Double_t valSizeAsDouble = fNumValidationString.Atof();


      if (valSizeAsDouble < 1.0) {

         // Relative spec. format 0.2

         nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;

      } else {

         // Absolute spec format 100 or 100.0

         nValidationSamples = valSizeAsDouble;

      }

   } else {

      Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."

            << Endl;

   }


   // Value validation

   // ----------------

   if (nValidationSamples < 0) {

      Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;

   }


   if (nValidationSamples == 0) {

      Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;

   }


   if (nValidationSamples >= (Int_t)trainingSetSize) {

      Log() << kFATAL << "Validation size \"" << fNumValidationString

            << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;

   }


   return nValidationSamples;

}


////////////////////////////////////////////////////////////////////////////////

///  Implementation of architecture specific train method

///

template <typename Architecture_t>

void MethodDL::TrainDeepNet()

{


   using Scalar_t = typename Architecture_t::Scalar_t;

   using Layer_t = TMVA::DNN::VGeneralLayer<Architecture_t>;

   using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t, Layer_t>;

   using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;


   bool debug = Log().GetMinType() == kDEBUG;


   // Determine the number of outputs

   // //    size_t outputSize = 1;

   // //    if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {

   // //       outputSize = GetNTargets();

   // //    } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {

   // //       outputSize = DataInfo().GetNClasses();

   // //    }


   // set the random seed for weight initialization

   Architecture_t::SetRandomSeed(fRandomSeed);


   ///split training data in training and validation data

   // and determine the number of training and testing examples


   size_t nValidationSamples = GetNumValidationSamples();

   size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;


   const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);

   const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};

   const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};


   size_t trainingPhase = 1;


   for (TTrainingSettings &settings : this->GetTrainingSettings()) {


      size_t nThreads = 1;       // FIXME threads are hard coded to 1, no use of slave threads or multi-threading


      // After the processing of the options, initialize the master deep net

      size_t batchSize = settings.batchSize;

      // Should be replaced by actual implementation. No support for this now.

      size_t inputDepth = this->GetInputDepth();

      size_t inputHeight = this->GetInputHeight();

      size_t inputWidth = this->GetInputWidth();

      size_t batchDepth = this->GetBatchDepth();

      size_t batchHeight = this->GetBatchHeight();

      size_t batchWidth = this->GetBatchWidth();

      ELossFunction J = this->GetLossFunction();

      EInitialization I = this->GetWeightInitialization();

      ERegularization R = settings.regularization;

      EOptimizer O = settings.optimizer;

      Scalar_t weightDecay = settings.weightDecay;


      //Batch size should be included in batch layout as well. There are two possibilities:

      //  1.  Batch depth = batch size   one will input tensorsa as (batch_size x d1 x d2)

      //       This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height

      //  2.  Batch depth = 1, batch height = batch size  batxch width = dim of input features

      //        This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )


      if (batchDepth != batchSize && batchDepth > 1) {

         Error("Train","Given batch depth of %zu (specified in BatchLayout)  should be equal to given batch size %zu",batchDepth,batchSize);

         return;

      }

      if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {

         Error("Train","Given batch height of %zu (specified in BatchLayout)  should be equal to given batch size %zu",batchHeight,batchSize);

         return;

      }


      //check also that input layout compatible with batch layout

      bool badLayout = false;

      // case batch depth == batch size

      if (batchDepth == batchSize)

         badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;

      // case batch Height is batch size

      if (batchHeight == batchSize && batchDepth == 1)

         badLayout |=  ( inputDepth * inputHeight * inputWidth !=  batchWidth);

      if (badLayout) {

         Error("Train","Given input layout %zu x %zu x %zu is not compatible with  batch layout %zu x %zu x  %zu ",

               inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);

         return;

      }


      // check batch size is compatible with number of events

      if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {

         Log() << kFATAL << "Number of samples in the datasets are train: ("

               << nTrainingSamples << ") test: (" << nValidationSamples

               << "). One of these is smaller than the batch size of "

               << settings.batchSize << ". Please increase the batch"

               << " size to be at least the same size as the smallest"

               << " of them." << Endl;

      }


      DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);


      // create a copy of DeepNet for evaluating but with batch size = 1

      // fNet is the saved network and will be with CPU or Referrence architecture

      if (trainingPhase == 1) {

         fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,

                                                                 batchHeight, batchWidth, J, I, R, weightDecay));

         fBuildNet = true;

      }

      else

         fBuildNet = false;


      // Initialize the vector of slave nets

      std::vector<DeepNet_t> nets{};

      nets.reserve(nThreads);

      for (size_t i = 0; i < nThreads; i++) {

         // create a copies of the master deep net

         nets.push_back(deepNet);

      }


      // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet

      CreateDeepNet(deepNet, nets);


      // set droput probabilities

      // use convention to store in the layer 1.- dropout probabilities

      std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);

      for (auto & p : dropoutVector) {

         p = 1.0 - p;

      }

      deepNet.SetDropoutProbabilities(dropoutVector);


      if (trainingPhase > 1) {

         // copy initial weights from fNet to deepnet

         for (size_t i = 0; i < deepNet.GetDepth(); ++i) {

            const auto & nLayer = fNet->GetLayerAt(i);

            const auto & dLayer = deepNet.GetLayerAt(i);

            // could use a traits for detecting equal architectures

           // dLayer->CopyWeights(nLayer->GetWeights());

           //  dLayer->CopyBiases(nLayer->GetBiases());

            Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );

            Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );

         }

      }


      // when fNet is built create also input matrix that will be used to evaluate it

      if (fBuildNet) {

         int n1 = batchHeight;

         int n2 = batchWidth;

         // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)

         if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();

         fXInput.emplace_back(MatrixImpl_t(n1,n2));

         // create pointer to output matrix used for the predictions

         fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(),  fNet->GetOutputWidth() ) );


         // print the created network

         Log()  << "*****   Deep Learning Network *****" << Endl;

         if (Log().GetMinType() <= kINFO)

            deepNet.Print();

      }

      Log() << "Using " << nTrainingSamples << " events for training and " <<  nValidationSamples << " for testing" << Endl;


      // Loading the training and validation datasets

      TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());

      TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, deepNet.GetBatchSize(),

                                      deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),

                                      deepNet.GetOutputWidth(), nThreads);


      TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());

      TensorDataLoader_t validationData(validationTuple, nValidationSamples, deepNet.GetBatchSize(),

                                     deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),

                                     deepNet.GetOutputWidth(), nThreads);


      // do an evaluation of the network to compute initial  minimum test error


      Bool_t includeRegularization = (R != DNN::ERegularization::kNone);


      Double_t minValError = 0.0;

      for (auto batch : validationData) {

         auto inputTensor = batch.GetInput();

         auto outputMatrix = batch.GetOutput();

         auto weights = batch.GetWeights();

         // should we apply droput to the loss ??

         minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);

      }

      // add Regularization term

      Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;

      minValError /= (Double_t)(nValidationSamples / settings.batchSize);

      minValError += regzTerm;


      // create a pointer to base class VOptimizer

      std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;


      // initialize the base class pointer with the corresponding derived class object.

      switch (O) {


      case EOptimizer::kSGD:

         optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(

            new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));

         break;


      case EOptimizer::kAdam:

         optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(

            new DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));

         break;


      case EOptimizer::kAdagrad:

         optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(

            new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));

         break;


      case EOptimizer::kRMSProp:

         optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(

            new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum));

         break;


      case EOptimizer::kAdadelta:

         optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(

            new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));

         break;

      }


      // Initialize the vector of batches, one batch for one slave network

      std::vector<TTensorBatch<Architecture_t>> batches{};


      bool converged = false;

      size_t convergenceCount = 0;

      size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();


      // start measuring

      std::chrono::time_point<std::chrono::system_clock> tstart, tend;

      tstart = std::chrono::system_clock::now();


      Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "

            << " Optimizer " << settings.optimizerName

            << " Learning rate = " << settings.learningRate

            << " regularization " << (char) settings.regularization

            << " minimum error = " << minValError

            << Endl;

      if (!fInteractive) {

         std::string separator(62, '-');

         Log() << separator << Endl;

         Log() << std::setw(10) << "Epoch"

               << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err."

               << std::setw(12) << "t(s)/epoch" << std::setw(12)  << "t(s)/Loss"

               << std::setw(12) << "nEvents/s"

               << std::setw(12) << "Conv. Steps" << Endl;

         Log() << separator << Endl;

      }


      // set up generator for shuffling the batches

      // if seed is zero we have always a different order in the batches

      size_t shuffleSeed = 0;

      if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;

      RandomGenerator<TRandom3> rng(shuffleSeed);


      // print weights before

      if (fBuildNet && debug) {

         Log() << "Initial Deep Net Weights " << Endl;

         auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();

         for (size_t l = 0; l < weights_tensor.size(); ++l)

            weights_tensor[l].Print();

         auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();

         bias_tensor[0].Print();

      }


      while (!converged) {

         optimizer->IncrementGlobalStep();

         trainingData.Shuffle(rng);


         // execute all epochs

         //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {


         for (size_t i = 0; i < batchesInEpoch; ++i ) {

            // Clean and load new batches, one batch for one slave net

            //batches.clear();

            //batches.reserve(nThreads);

            //for (size_t j = 0; j < nThreads; j++) {

            //   batches.push_back(trainingData.GetTensorBatch());

            //}


            auto my_batch = trainingData.GetTensorBatch();


            // execute one optimization step

            deepNet.Forward(my_batch.GetInput(), true);

            deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());

            optimizer->Step();

         }

         //}


         if ((optimizer->GetGlobalStep() % settings.testInterval) == 0) {


            std::chrono::time_point<std::chrono::system_clock> t1,t2;


            t1 = std::chrono::system_clock::now();


            // Compute validation error.

            Double_t valError = 0.0;

            for (auto batch : validationData) {

               auto inputTensor = batch.GetInput();

               auto outputMatrix = batch.GetOutput();

               auto weights = batch.GetWeights();

               // should we apply droput to the loss ??

               valError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);

            }

            // normalize loss to number of batches and add regularization term

            Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;

            valError /= (Double_t)(nValidationSamples / settings.batchSize);

            valError += regTerm;


            t2 = std::chrono::system_clock::now();


            // checking for convergence

            if (valError < minValError) {

               convergenceCount = 0;

            } else {

               convergenceCount += settings.testInterval;

            }


            // copy configuration when reached a minimum error

            if (valError < minValError ) {

               // Copy weights from deepNet to fNet

               Log() << std::setw(10) << optimizer->GetGlobalStep()

                     << " Minimum Test error found - save the configuration " << Endl;

               for (size_t i = 0; i < deepNet.GetDepth(); ++i) {

                  const auto & nLayer = fNet->GetLayerAt(i);

                  const auto & dLayer = deepNet.GetLayerAt(i);

                  ArchitectureImpl_t::CopyDiffArch(nLayer->GetWeights(), dLayer->GetWeights() );

                  ArchitectureImpl_t::CopyDiffArch(nLayer->GetBiases(), dLayer->GetBiases() );

                  // std::cout << "Weights for layer " << i << std::endl;

                  // for (size_t k = 0; k < dlayer->GetWeights().size(); ++k)

                  //    dLayer->GetWeightsAt(k).Print();

               }

               minValError = valError;

            }

            else if ( minValError <= 0. )

               minValError = valError;


            Double_t trainingError = 0.0;

            // Compute training error.

            for (auto batch : trainingData) {

               auto inputTensor = batch.GetInput();

               auto outputMatrix = batch.GetOutput();

               auto weights = batch.GetWeights();

               trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);

            }

            // normalize loss to number of batches and add regularization term

            trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);

            trainingError += regTerm;


            // stop measuring

            tend = std::chrono::system_clock::now();


            // Compute numerical throughput.

            std::chrono::duration<double> elapsed_seconds = tend - tstart;

            std::chrono::duration<double> elapsed1 = t1-tstart;

            // std::chrono::duration<double> elapsed2 = t2-tstart;

            // time to compute training and test errors

            std::chrono::duration<double> elapsed_testing = tend-t1;


            double seconds = elapsed_seconds.count();

            // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;

            // nGFlops *= deepnet.GetNFlops() * 1e-9;

            double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);


            converged =

               convergenceCount > settings.convergenceSteps || optimizer->GetGlobalStep() >= settings.maxEpochs;


            Log() << std::setw(10) << optimizer->GetGlobalStep()  << " | "

                  << std::setw(12) << trainingError

                  << std::setw(12) << valError

                  << std::setw(12) << seconds / settings.testInterval

                  << std::setw(12)  << elapsed_testing.count()

                  << std::setw(12) << 1. / eventTime

                  << std::setw(12) << convergenceCount

                  << Endl;


            if (converged) {

               Log() << Endl;

            }

            tstart = std::chrono::system_clock::now();

         }


         // if (stepCount % 10 == 0 || converged) {

         if (converged && debug) {

            Log() << "Final Deep Net Weights for phase  " << trainingPhase << " epoch " << optimizer->GetGlobalStep()

                  << Endl;

            auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();

            auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();

            for (size_t l = 0; l < weights_tensor.size(); ++l)

               weights_tensor[l].Print();

            bias_tensor[0].Print();

         }


      }


      trainingPhase++;

   }  // end loop on training Phase

}


////////////////////////////////////////////////////////////////////////////////

void MethodDL::Train()

{

   if (fInteractive) {

      Log() << kFATAL << "Not implemented yet" << Endl;

      return;

   }


   // using for training same scalar type defined for the prediction

   if (this->GetArchitectureString() == "GPU") {

#ifdef R__HAS_TMVAGPU

      Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;

      TrainDeepNet<DNN::TCuda<ScalarImpl_t> >();

#else

      Log() << kFATAL << "CUDA backend not enabled. Please make sure "

         "you have CUDA installed and it was successfully "

         "detected by CMAKE."

             << Endl;

      return;

#endif

   } else if (this->GetArchitectureString() == "OPENCL") {

      Log() << kFATAL << "OPENCL backend not yet supported." << Endl;

      return;

   } else if (this->GetArchitectureString() == "CPU") {

#ifdef R__HAS_TMVACPU

      // note that number of threads used for BLAS might be different

      // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend

      Log() << kINFO << "Start of deep neural network training on CPU using (for ROOT-IMT) nthreads = "

            << gConfig().GetNCpu() << Endl << Endl;

      TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();

#else

      Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "

                      "you have a BLAS implementation and it was successfully "

                      "detected by CMake as well that the imt CMake flag is set."

            << Endl;

      return;

#endif

   } else if (this->GetArchitectureString() == "STANDARD") {

      Log() << kINFO << "Start of deep neural network training on the STANDARD architecture" << Endl << Endl;

      TrainDeepNet<DNN::TReference<ScalarImpl_t> >();

   }

   else {

      Log() << kFATAL << this->GetArchitectureString() <<

                      " is not  a supported archiectire for TMVA::MethodDL"

            << Endl;

   }


// /// definitions for CUDA

// #ifdef R__HAS_TMVAGPU // Included only if DNNCUDA flag is set.

//    using Architecture_t = DNN::TCuda<Double_t>;

// #else

// #ifdef R__HAS_TMVACPU // Included only if DNNCPU flag is set.

//    using Architecture_t = DNN::TCpu<Double_t>;

// #else

//    using Architecture_t = DNN::TReference<Double_t>;

// #endif

// #endif

}


////////////////////////////////////////////////////////////////////////////////

Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)

{


   // note that fNet  should have been build with a batch size of  1


   if (!fNet || fNet->GetDepth() == 0) {

       Log() << kFATAL << "The network has not been trained and fNet is not built"

             << Endl;

   }


   // input  size must be equal to  1 which is the batch size of fNet

   R__ASSERT(fXInput.size() == 1 && fNet->GetBatchSize() == 1);


   // int batchWidth = fNet->GetBatchWidth();

   // int batchDepth = fNet->GetBatchDepth();

   // int batchHeight = fNet->GetBatchHeight();

//   int noutput = fNet->GetOutputWidth();


   // get current event

   const std::vector<Float_t> &inputValues = GetEvent()->GetValues();


   int n1 = fXInput[0].GetNrows();

   int n2 = fXInput[0].GetNcols();


   int nVariables = GetEvent()->GetNVariables();


   if (n1*n2 != nVariables) {

      Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"

            << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2

            << Endl;

   }

   // get the event data in input matrix

   for (int j = 0; j < n1; ++j) {

      for (int k = 0; k < n2; k++) {

         fXInput[0](j, k) = inputValues[j*n2+k];

      }

   }


   // perform the prediction

   fNet->Prediction(*fYHat, fXInput, fOutputFunction);


   // return value

   double mvaValue = (*fYHat)(0, 0);


   // for debugging

#ifdef DEBUG_MVAVALUE

   using Tensor_t = std::vector<MatrixImpl_t>;

    TMatrixF  xInput(n1,n2, inputValues.data() );

    std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;

    xInput.Print();

    std::cout << "Output of DeepNet " << mvaValue << std::endl;

    auto & deepnet = *fNet;

    std::cout << "Loop on layers " << std::endl;

    for (int l = 0; l < deepnet.GetDepth(); ++l) {

       std::cout << "Layer " << l;

       const auto *  layer = deepnet.GetLayerAt(l);

       const Tensor_t & layer_output = layer->GetOutput();

       layer->Print();

       std::cout << "DNN output " << layer_output.size() << std::endl;

       for (size_t i = 0; i < layer_output.size(); ++i) {

#ifdef R__HAS_TMVAGPU

          //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer()  );

          TMatrixD m = layer_output[i];

#else

          TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer()  );

#endif

          m.Print();

       }

       const Tensor_t & layer_weights = layer->GetWeights();

       std::cout << "DNN weights " << layer_weights.size() << std::endl;

       if (layer_weights.size() > 0) {

          int i = 0;

#ifdef R__HAS_TMVAGPU

          TMatrixD m = layer_weights[i];

//          TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer()  );

#else

          TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer()  );

#endif

          m.Print();

       }

    }

#endif


   return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;

}

////////////////////////////////////////////////////////////////////////////////

/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class

////////////////////////////////////////////////////////////////////////////////

template <typename Architecture_t>

std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)

{


   // Check whether the model is setup

   if (!fNet || fNet->GetDepth() == 0) {

       Log() << kFATAL << "The network has not been trained and fNet is not built"

             << Endl;

   }


   // rebuild the networks


   size_t inputDepth = this->GetInputDepth();

   size_t inputHeight = this->GetInputHeight();

   size_t inputWidth = this->GetInputWidth();

   size_t batchDepth = this->GetBatchDepth();

   size_t batchHeight = this->GetBatchHeight();

   size_t batchWidth = this->GetBatchWidth();

   ELossFunction J = fNet->GetLossFunction();

   EInitialization I = fNet->GetInitialization();

   ERegularization R = fNet->GetRegularization();

   Double_t weightDecay = fNet->GetWeightDecay();


   using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;

   using Matrix_t = typename Architecture_t::Matrix_t;

   using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;


   // create the deep neural network

   DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);

   std::vector<DeepNet_t> nets{};

   fBuildNet = false;

   CreateDeepNet(deepNet,nets);


   // copy weights from the saved fNet to the built DeepNet

   for (size_t i = 0; i < deepNet.GetDepth(); ++i) {

      const auto & nLayer = fNet->GetLayerAt(i);

      const auto & dLayer = deepNet.GetLayerAt(i);

      Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );

      Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );

   }


   size_t n1 = deepNet.GetBatchHeight();

   size_t n2 = deepNet.GetBatchWidth();

   size_t n0 = deepNet.GetBatchSize();

   // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)

   if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {

      n1 = deepNet.GetBatchSize();

      n0 = 1;

   }


   Long64_t nEvents = lastEvt - firstEvt;

   TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());

   TensorDataLoader_t testData(testTuple, nEvents, batchSize, n0, n1, n2, deepNet.GetOutputWidth(), 1);


   // Tensor_t xInput;

   // for (size_t i = 0; i < n0; ++i)

   //    xInput.emplace_back(Matrix_t(n1,n2));


   // create pointer to output matrix used for the predictions

   Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );


   // use timer

   Timer timer( nEvents, GetName(), kTRUE );


   if (logProgress)

      Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())

            << "Evaluation of " << GetMethodName() << " on "

            << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")

            << " sample (" << nEvents << " events)" << Endl;


   // eventg loop

   std::vector<double> mvaValues(nEvents);


   for ( Long64_t ievt = firstEvt;  ievt < lastEvt; ievt+=batchSize) {


      Long64_t ievt_end = ievt + batchSize;

      // case of batch prediction for

      if (ievt_end <=  lastEvt) {


         if (ievt == firstEvt) {

            Data()->SetCurrentEvent(ievt);

            size_t nVariables = GetEvent()->GetNVariables();


            if (n1 == batchSize && n0 == 1)  {

               if (n2 != nVariables) {

                  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"

                        << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2

                        << Endl;

               }

            } else {

               if (n1*n2 != nVariables || n0 != batchSize) {

                  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"

                        << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2

                        << Endl;

               }

            }

         }


         auto batch = testData.GetTensorBatch();

         auto inputTensor = batch.GetInput();


         auto xInput = batch.GetInput();

         // make the prediction

         deepNet.Prediction(yHat, xInput, fOutputFunction);

         for (size_t i = 0; i < batchSize; ++i) {

            double value =  yHat(i,0);

            mvaValues[ievt + i] =  (TMath::IsNaN(value)) ? -999. : value;

         }

      }

      else {

         // case of remaining events: compute prediction by single event !

         for (Long64_t i = ievt; i < lastEvt; ++i) {

            Data()->SetCurrentEvent(i);

            mvaValues[i] = GetMvaValue();

         }

      }

   }


   if (logProgress) {

      Log() << kINFO

            << "Elapsed time for evaluation of " << nEvents <<  " events: "

            << timer.GetElapsedTime() << "       " << Endl;

   }


   return mvaValues;

}


const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()

{

   size_t nVariables = GetEvent()->GetNVariables();

   MatrixImpl_t X(1, nVariables);

   std::vector<MatrixImpl_t> X_vec;

   const Event *ev = GetEvent();

   const std::vector<Float_t>& inputValues = ev->GetValues();

   for (size_t i = 0; i < nVariables; i++) {

       X(0,i) = inputValues[i];

   }

   X_vec.emplace_back(X);

   size_t nTargets = std::max(1u, ev->GetNTargets());

   MatrixImpl_t YHat(1, nTargets);

   std::vector<Float_t> output(nTargets);

   fNet->Prediction(YHat, X_vec, fOutputFunction);


   for (size_t i = 0; i < nTargets; i++)

       output[i] = YHat(0, i);


   if (fRegressionReturnVal == NULL) {

       fRegressionReturnVal = new std::vector<Float_t>();

   }

   fRegressionReturnVal->clear();


   Event * evT = new Event(*ev);

   for (size_t i = 0; i < nTargets; ++i) {

      evT->SetTarget(i, output[i]);

   }


   const Event* evT2 = GetTransformationHandler().InverseTransform(evT);

   for (size_t i = 0; i < nTargets; ++i) {

      fRegressionReturnVal->push_back(evT2->GetTarget(i));

   }

   delete evT;

   return *fRegressionReturnVal;

}


const std::vector<Float_t> & TMVA::MethodDL::GetMulticlassValues()

{

   size_t nVariables = GetEvent()->GetNVariables();

   MatrixImpl_t X(1, nVariables);

   std::vector<MatrixImpl_t> X_vec;

   MatrixImpl_t YHat(1, DataInfo().GetNClasses());

   if (fMulticlassReturnVal == NULL) {

      fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());

   }


   const std::vector<Float_t>& inputValues = GetEvent()->GetValues();

   for (size_t i = 0; i < nVariables; i++) {

      X(0,i) = inputValues[i];

   }

   X_vec.emplace_back(X);

   fNet->Prediction(YHat, X_vec, fOutputFunction);

   for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {

      (*fMulticlassReturnVal)[i] = YHat(0, i);

   }

   return *fMulticlassReturnVal;

}


////////////////////////////////////////////////////////////////////////////////

/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class

////////////////////////////////////////////////////////////////////////////////

std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)

{


   Long64_t nEvents = Data()->GetNEvents();

   if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;

   if (firstEvt < 0) firstEvt = 0;

   nEvents = lastEvt-firstEvt;


   // use same batch size as for training (from first strategy)

   size_t batchSize = (fTrainingSettings.empty()) ? 1000 :  fTrainingSettings.front().batchSize;

   if  ( size_t(nEvents) < batchSize ) batchSize = nEvents;


   // using for training same scalar type defined for the prediction

   if (this->GetArchitectureString() == "GPU") {

#ifdef R__HAS_TMVAGPU

      Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " <<  batchSize << Endl << Endl;

      return PredictDeepNet<DNN::TCuda<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);

#endif

   } else if (this->GetArchitectureString() == "CPU") {

#ifdef R__HAS_TMVACPU

      Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;

      return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);

#endif

   }

   Log() << kINFO << "Evaluate deep neural network on the STANDARD architecture  using batches with size = " << batchSize

         << Endl << Endl;

   return PredictDeepNet<DNN::TReference<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);

}

////////////////////////////////////////////////////////////////////////////////

void MethodDL::AddWeightsXMLTo(void * parent) const

{

      // Create the parent XML node with name "Weights"

   auto & xmlEngine = gTools().xmlengine();

   void* nn = xmlEngine.NewChild(parent, 0, "Weights");


   /*! Get all necessary information, in order to be able to reconstruct the net

    *  if we read the same XML file. */


   // Deep Net specific info

   Int_t depth = fNet->GetDepth();


   Int_t inputDepth = fNet->GetInputDepth();

   Int_t inputHeight = fNet->GetInputHeight();

   Int_t inputWidth = fNet->GetInputWidth();


   Int_t batchSize = fNet->GetBatchSize();


   Int_t batchDepth = fNet->GetBatchDepth();

   Int_t batchHeight = fNet->GetBatchHeight();

   Int_t batchWidth = fNet->GetBatchWidth();


   char lossFunction = static_cast<char>(fNet->GetLossFunction());

   char initialization = static_cast<char>(fNet->GetInitialization());

   char regularization = static_cast<char>(fNet->GetRegularization());


   Double_t weightDecay = fNet->GetWeightDecay();


   // Method specific info (not sure these are needed)

   char outputFunction = static_cast<char>(this->GetOutputFunction());

   //char lossFunction = static_cast<char>(this->GetLossFunction());


   // Add attributes to the parent node

   xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));


   xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));

   xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));

   xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));


   xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));

   xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));

   xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));

   xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));


   xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));

   xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));

   xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));

   xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));


   gTools().AddAttr(nn, "WeightDecay", weightDecay);


   for (Int_t i = 0; i < depth; i++)

   {

      fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);

   }


}


////////////////////////////////////////////////////////////////////////////////

void MethodDL::ReadWeightsFromXML(void * rootXML)

{


   auto netXML = gTools().GetChild(rootXML, "Weights");

   if (!netXML){

      netXML = rootXML;

   }


   size_t netDepth;

   gTools().ReadAttr(netXML, "NetDepth", netDepth);


   size_t inputDepth, inputHeight, inputWidth;

   gTools().ReadAttr(netXML, "InputDepth", inputDepth);

   gTools().ReadAttr(netXML, "InputHeight", inputHeight);

   gTools().ReadAttr(netXML, "InputWidth", inputWidth);


   size_t batchSize, batchDepth, batchHeight, batchWidth;

   gTools().ReadAttr(netXML, "BatchSize", batchSize);

   // use always batchsize = 1

   //batchSize = 1;

   gTools().ReadAttr(netXML, "BatchDepth", batchDepth);

   gTools().ReadAttr(netXML, "BatchHeight", batchHeight);

   gTools().ReadAttr(netXML, "BatchWidth",  batchWidth);


   char lossFunctionChar;

   gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);

   char initializationChar;

   gTools().ReadAttr(netXML, "Initialization", initializationChar);

   char regularizationChar;

   gTools().ReadAttr(netXML, "Regularization", regularizationChar);

   char outputFunctionChar;

   gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);

   double weightDecay;

   gTools().ReadAttr(netXML, "WeightDecay", weightDecay);


   // create the net


   // DeepNetCpu_t is defined in MethodDL.h

   this->SetInputDepth(inputDepth);

   this->SetInputHeight(inputHeight);

   this->SetInputWidth(inputWidth);

   this->SetBatchDepth(batchDepth);

   this->SetBatchHeight(batchHeight);

   this->SetBatchWidth(batchWidth);


   fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,

                                                   batchHeight, batchWidth,

                                                   static_cast<ELossFunction>(lossFunctionChar),

                                                   static_cast<EInitialization>(initializationChar),

                                                   static_cast<ERegularization>(regularizationChar),

                                                    weightDecay));


   fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);


   //size_t previousWidth = inputWidth;

   auto layerXML = gTools().xmlengine().GetChild(netXML);


   // loop on the layer and add them to the network

   for (size_t i = 0; i < netDepth; i++) {


      TString layerName = gTools().xmlengine().GetNodeName(layerXML);


      // case of dense layer

      if (layerName == "DenseLayer") {


         // read width and activation function and then we can create the layer

         size_t width = 0;

         gTools().ReadAttr(layerXML, "Width", width);


         // Read activation function.

         TString funcString;

         gTools().ReadAttr(layerXML, "ActivationFunction", funcString);

         EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());


         fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability


      }

      // Convolutional Layer

      else if (layerName == "ConvLayer") {


         // read width and activation function and then we can create the layer

         size_t depth = 0;

         gTools().ReadAttr(layerXML, "Depth", depth);

         size_t fltHeight, fltWidth = 0;

         size_t strideRows, strideCols = 0;

         size_t padHeight, padWidth = 0;

         gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);

         gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);

         gTools().ReadAttr(layerXML, "StrideRows", strideRows);

         gTools().ReadAttr(layerXML, "StrideCols", strideCols);

         gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);

         gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);


         // Read activation function.

         TString funcString;

         gTools().ReadAttr(layerXML, "ActivationFunction", funcString);

         EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());


         fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,

                            padHeight, padWidth, actFunction);


      }


      // MaxPool Layer

      else if (layerName == "MaxPoolLayer") {


         // read maxpool layer info

         size_t filterHeight, filterWidth = 0;

         size_t strideRows, strideCols = 0;

         gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);

         gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);

         gTools().ReadAttr(layerXML, "StrideRows", strideRows);

         gTools().ReadAttr(layerXML, "StrideCols", strideCols);


         fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);

      }

      else if (layerName == "ReshapeLayer") {


         // read reshape layer info

         size_t depth, height, width = 0;

         gTools().ReadAttr(layerXML, "Depth", depth);

         gTools().ReadAttr(layerXML, "Height", height);

         gTools().ReadAttr(layerXML, "Width", width);

         int flattening = 0;

         gTools().ReadAttr(layerXML, "Flattening",flattening );


         fNet->AddReshapeLayer(depth, height, width, flattening);


      }

      else if (layerName == "RNNLayer") {


         // read RNN layer info

         size_t  stateSize,inputSize, timeSteps = 0;

         int rememberState= 0;

         gTools().ReadAttr(layerXML, "StateSize", stateSize);

         gTools().ReadAttr(layerXML, "InputSize", inputSize);

         gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);

         gTools().ReadAttr(layerXML, "RememberState", rememberState );


         fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);


      }


      // read eventually weights and biases

      fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);


      // read next layer

      layerXML = gTools().GetNextChild(layerXML);

   }


   fBuildNet = false;

   // create now the input and output matrices

   int n1 = batchHeight;

   int n2 = batchWidth;

   // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)

   if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();

   if (fXInput.size() > 0) fXInput.clear();

   fXInput.emplace_back(MatrixImpl_t(n1,n2));

   // create pointer to output matrix used for the predictions

   fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(),  fNet->GetOutputWidth() ) );


}


////////////////////////////////////////////////////////////////////////////////

void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)

{

}


////////////////////////////////////////////////////////////////////////////////

const Ranking *TMVA::MethodDL::CreateRanking()

{

   // TODO

   return NULL;

}


////////////////////////////////////////////////////////////////////////////////

void MethodDL::GetHelpMessage() const

{

   // TODO

}


} // namespace TMVA

Adadelta.h

Adagrad.h

Adam.h

ClassifierFactory.h

REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition: ClassifierFactory.h:124

Configurable.h

DLMinimizers.h

IMethod.h

MethodDL.h

RMSProp.h

R
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110

e
#define e(i)
Definition: RSha256.hxx:103

Int_t
int Int_t
Definition: RtypesCore.h:41

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:88

Bool_t
bool Bool_t
Definition: RtypesCore.h:59

Double_t
double Double_t
Definition: RtypesCore.h:55

Long64_t
long long Long64_t
Definition: RtypesCore.h:69

kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:87

ClassImp
#define ClassImp(name)
Definition: Rtypes.h:365

SGD.h

width
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121

R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:96

type
int type
Definition: TGX11.cxx:120

TMath.h

TStopwatch.h

TString.h

Form
char * Form(const char *fmt,...)

TensorDataLoader.h

Timer.h

Tools.h

TFormula
The Formula class.
Definition: TFormula.h:84

TFormula::Eval
Double_t Eval(Double_t x) const
Sets first variable (e.g. x) and evaluate formula.
Definition: TFormula.cxx:3318

TIter
Definition: TCollection.h:233

TMVA::Config::GetNCpu
UInt_t GetNCpu()
Definition: Config.h:72

TMVA::Configurable::DeclareOptionRef
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")

TMVA::Configurable::AddPreDefVal
void AddPreDefVal(const T &)
Definition: Configurable.h:168

TMVA::Configurable::Log
MsgLogger & Log() const
Definition: Configurable.h:122

TMVA::DNN::CNN::TConvLayer
Definition: ConvLayer.h:43

TMVA::DNN::RNN::TBasicRNNLayer
Definition: RNNLayer.h:56

TMVA::DNN::TAdadelta
Adadelta Optimizer class.
Definition: Adadelta.h:44

TMVA::DNN::TAdagrad
Adagrad Optimizer class.
Definition: Adagrad.h:44

TMVA::DNN::TAdam
Adam Optimizer class.
Definition: Adam.h:44

TMVA::DNN::TDeepNet
Generic Deep Neural Network class.
Definition: DeepNet.h:74

TMVA::DNN::TDeepNet::AddDenseLayer
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:618

TMVA::DNN::TDeepNet::AddMaxPoolLayer
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:449

TMVA::DNN::TDeepNet::AddConvLayer
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:403

TMVA::DNN::TDeepNet::AddBasicRNNLayer
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:488

TMVA::DNN::TDeepNet::AddReshapeLayer
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:651

TMVA::DNN::TDenseLayer
Generic layer class.
Definition: DenseLayer.h:56

TMVA::DNN::TRMSProp
RMSProp Optimizer class.
Definition: RMSProp.h:44

TMVA::DNN::TReference::CopyDiffArch
static void CopyDiffArch(TMatrixT< Scalar_t > &A, const AMatrix_t &B)
Definition: Reference.h:569

TMVA::DNN::TSGD
Stochastic Batch Gradient Descent Optimizer class.
Definition: SGD.h:45

TMVA::DNN::TTensorDataLoader
TTensorDataLoader.
Definition: TensorDataLoader.h:130

TMVA::DNN::VGeneralLayer
Generic General Layer class.
Definition: GeneralLayer.h:46

TMVA::DNN::VGeneralLayer::Initialize
void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:376

TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:60

TMVA::DataSet::GetCurrentType
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205

TMVA::DataSet::GetNEvents
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217

TMVA::DataSet::SetCurrentEvent
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99

TMVA::Event
Definition: Event.h:52

TMVA::Event::SetTarget
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360

TMVA::Event::GetNVariables
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309

TMVA::Event::GetNTargets
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:320

TMVA::Event::GetClass
UInt_t GetClass() const
Definition: Event.h:87

TMVA::Event::GetValues
std::vector< Float_t > & GetValues()
Definition: Event.h:95

TMVA::Event::GetTarget
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:103

TMVA::MethodBase
Virtual base Class for all MVA method.
Definition: MethodBase.h:109

TMVA::MethodBase::GetName
const char * GetName() const
Definition: MethodBase.h:325

TMVA::MethodBase::IgnoreEventsWithNegWeightsInTraining
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:675

TMVA::MethodBase::GetEventCollection
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
Definition: MethodBase.cxx:3337

TMVA::MethodBase::GetMethodName
const TString & GetMethodName() const
Definition: MethodBase.h:322

TMVA::MethodBase::GetEvent
const Event * GetEvent() const
Definition: MethodBase.h:740

TMVA::MethodBase::DataInfo
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401

TMVA::MethodBase::GetNVariables
UInt_t GetNVariables() const
Definition: MethodBase.h:336

TMVA::MethodBase::fAnalysisType
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:584

TMVA::MethodBase::GetNvar
UInt_t GetNvar() const
Definition: MethodBase.h:335

TMVA::MethodBase::Data
DataSet * Data() const
Definition: MethodBase.h:400

TMVA::MethodBase::fInteractive
IPythonInteractive * fInteractive
Definition: MethodBase.h:437

TMVA::MethodDL
Definition: MethodDL.h:78

TMVA::MethodDL::SetInputWidth
void SetInputWidth(size_t inputWidth)
Definition: MethodDL.h:260

TMVA::MethodDL::fBatchHeight
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition: MethodDL.h:164

TMVA::MethodDL::GetHelpMessage
void GetHelpMessage() const
Definition: MethodDL.cxx:2114

TMVA::MethodDL::fLossFunction
DNN::ELossFunction fLossFunction
The loss function.
Definition: MethodDL.h:171

TMVA::MethodDL::GetMulticlassValues
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDL.cxx:1813

TMVA::MethodDL::fInputHeight
size_t fInputHeight
The height of the input.
Definition: MethodDL.h:160

TMVA::MethodDL::fLayoutString
TString fLayoutString
The string defining the layout of the deep net.
Definition: MethodDL.h:175

TMVA::MethodDL::fYHat
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:98

TMVA::MethodDL::Train
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1496

TMVA::MethodDL::GetBatchHeight
size_t GetBatchHeight() const
Definition: MethodDL.h:235

TMVA::MethodDL::GetMvaValues
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
Definition: MethodDL.cxx:1839

TMVA::MethodDL::ParseRnnLayer
void ParseRnnLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:871

TMVA::MethodDL::fWeightInitializationString
TString fWeightInitializationString
The string defining the weight initialization method.
Definition: MethodDL.h:178

TMVA::MethodDL::ParseMaxPoolLayer
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:749

TMVA::MethodDL::fRandomSeed
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:167

TMVA::MethodDL::fArchitectureString
TString fArchitectureString
The string defining the architecure: CPU or GPU.
Definition: MethodDL.h:179

TMVA::MethodDL::Init
void Init()
default initializations
Definition: MethodDL.cxx:418

TMVA::MethodDL::MethodDL
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:951

TMVA::MethodDL::TrainDeepNet
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1092

TMVA::MethodDL::GetTrainingSettings
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:252

TMVA::MethodDL::GetOutputFunction
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:241

TMVA::MethodDL::ParseLstmLayer
void ParseLstmLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate lstm layer.
Definition: MethodDL.cxx:932

TMVA::MethodDL::SetInputDepth
void SetInputDepth(size_t inputDepth)
Setters.
Definition: MethodDL.h:258

TMVA::MethodDL::ParseDenseLayer
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:570

TMVA::MethodDL::GetNumValidationSamples
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation

TMVA::MethodDL::GetBatchLayoutString
TString GetBatchLayoutString() const
Definition: MethodDL.h:245

TMVA::MethodDL::ProcessOptions
void ProcessOptions()
Definition: MethodDL.cxx:221

TMVA::MethodDL::fBatchWidth
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition: MethodDL.h:165

TMVA::MethodDL::GetInputDepth
size_t GetInputDepth() const
Definition: MethodDL.h:230

TMVA::MethodDL::GetRegressionValues
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDL.cxx:1776

TMVA::MethodDL::fNet
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:99

TMVA::MethodDL::GetInputLayoutString
TString GetInputLayoutString() const
Definition: MethodDL.h:244

TMVA::MethodDL::SetBatchHeight
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:263

TMVA::MethodDL::fXInput
std::vector< MatrixImpl_t > fXInput
Definition: MethodDL.h:97

TMVA::MethodDL::GetInputHeight
size_t GetInputHeight() const
Definition: MethodDL.h:231

TMVA::MethodDL::GetArchitectureString
TString GetArchitectureString() const
Definition: MethodDL.h:250

TMVA::MethodDL::ParseBatchLayout
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:471

TMVA::MethodDL::ReadWeightsFromStream
void ReadWeightsFromStream(std::istream &)
Definition: MethodDL.cxx:2102

TMVA::MethodDL::ReadWeightsFromXML
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:1930

TMVA::MethodDL::fNumValidationString
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition: MethodDL.h:180

TMVA::MethodDL::KeyValueVector_t
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:82

TMVA::MethodDL::fOutputFunction
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:170

TMVA::MethodDL::fWeightInitialization
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:169

TMVA::MethodDL::GetBatchDepth
size_t GetBatchDepth() const
Definition: MethodDL.h:234

TMVA::MethodDL::fTrainingSettings
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:185

TMVA::MethodDL::GetInputWidth
size_t GetInputWidth() const
Definition: MethodDL.h:232

TMVA::MethodDL::GetLossFunction
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:242

TMVA::MethodDL::fBatchLayoutString
TString fBatchLayoutString
The string defining the layout of the batch.
Definition: MethodDL.h:174

TMVA::MethodDL::HasAnalysisType
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:1019

TMVA::MethodDL::ParseConvLayer
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:650

TMVA::MethodDL::ParseReshapeLayer
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:810

TMVA::MethodDL::fTrainingStrategyString
TString fTrainingStrategyString
The string defining the training strategy.
Definition: MethodDL.h:177

TMVA::MethodDL::CreateRanking
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2107

TMVA::MethodDL::SetInputHeight
void SetInputHeight(size_t inputHeight)
Definition: MethodDL.h:259

TMVA::MethodDL::SetBatchDepth
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:262

TMVA::MethodDL::fInputWidth
size_t fInputWidth
The width of the input.
Definition: MethodDL.h:161

TMVA::MethodDL::ParseKeyValueString
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:980

TMVA::MethodDL::fInputDepth
size_t fInputDepth
The depth of the input.
Definition: MethodDL.h:159

TMVA::MethodDL::SetBatchWidth
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:264

TMVA::MethodDL::PredictDeepNet
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1647

TMVA::MethodDL::GetWeightInitialization
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:240

TMVA::MethodDL::GetLayoutString
TString GetLayoutString() const
Definition: MethodDL.h:246

TMVA::MethodDL::fBatchDepth
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition: MethodDL.h:163

TMVA::MethodDL::DeepNetImpl_t
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition: MethodDL.h:93

TMVA::MethodDL::GetBatchWidth
size_t GetBatchWidth() const
Definition: MethodDL.h:236

TMVA::MethodDL::AddWeightsXMLTo
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:1869

TMVA::MethodDL::MatrixImpl_t
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:94

TMVA::MethodDL::GetMvaValue
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1556

TMVA::MethodDL::~MethodDL
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:973

TMVA::MethodDL::ParseInputLayout
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:425

TMVA::MethodDL::fBuildNet
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:182

TMVA::MethodDL::CreateDeepNet
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:518

TMVA::MethodDL::fErrorStrategy
TString fErrorStrategy
The string defining the error strategy for training.
Definition: MethodDL.h:176

TMVA::MethodDL::DeclareOptions
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:161

TMVA::MethodDL::fInputLayoutString
TString fInputLayoutString
The string defining the layout of the input.
Definition: MethodDL.h:173

TMVA::MsgLogger::GetMinType
EMsgType GetMinType() const
Definition: MsgLogger.h:71

TMVA::RandomGenerator
Definition: Tools.h:307

TMVA::Ranking
Ranking for variables in method (implementation)
Definition: Ranking.h:48

TMVA::Timer
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58

TMVA::Timer::GetElapsedTime
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134

TMVA::Tools::GetNextChild
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174

TMVA::Tools::GetChild
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162

TMVA::Tools::xmlengine
TXMLEngine & xmlengine()
Definition: Tools.h:270

TMVA::Tools::ReadAttr
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:337

TMVA::Tools::AddAttr
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:355

TMVA::Tools::StringFromInt
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1235

TMVA::Types
Singleton class for Global types used by TMVA.
Definition: Types.h:73

TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:127

TMVA::Types::kMulticlass
@ kMulticlass
Definition: Types.h:130

TMVA::Types::kClassification
@ kClassification
Definition: Types.h:128

TMVA::Types::kRegression
@ kRegression
Definition: Types.h:129

TMVA::Types::kTraining
@ kTraining
Definition: Types.h:144

TMatrixTBase::Print
void Print(Option_t *name="") const
Print the matrix as a table of elements.
Definition: TMatrixTBase.cxx:832

TMatrixT
TMatrixT.
Definition: TMatrixT.h:39

TNamed::Print
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:128

TObjArray
An array of TObjects.
Definition: TObjArray.h:37

TObjString
Collectable string class.
Definition: TObjString.h:28

TObjString::GetString
const TString & GetString() const
Definition: TObjString.h:46

TObject::Error
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880

TObject::Print
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:550

TString
Basic string class.
Definition: TString.h:131

TString::Length
Ssiz_t Length() const
Definition: TString.h:405

TString::Atoi
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1921

TString::Strip
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1106

TString::Atof
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987

TString::IsFloat
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791

TString::First
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:499

TString::Data
const char * Data() const
Definition: TString.h:364

TString::ReplaceAll
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687

TString::kTrailing
@ kTrailing
Definition: TString.h:262

TString::kBoth
@ kBoth
Definition: TString.h:262

TString::ToUpper
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1138

TString::Tokenize
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197

TString::BeginsWith
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610

TXMLEngine::GetChild
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
Definition: TXMLEngine.cxx:1140

TXMLEngine::NewChild
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:709

TXMLEngine::GetNodeName
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
Definition: TXMLEngine.cxx:1069

I
#define I(x, y, z)

ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:34

TMVA::DNN::CNN
Definition: ConvLayer.h:40

TMVA::DNN
Definition: Adadelta.h:35

TMVA::DNN::EInitialization
EInitialization
Definition: Functions.h:70

TMVA::DNN::EInitialization::kGauss
@ kGauss

TMVA::DNN::EInitialization::kGlorotNormal
@ kGlorotNormal

TMVA::DNN::EInitialization::kUniform
@ kUniform

TMVA::DNN::EInitialization::kGlorotUniform
@ kGlorotUniform

TMVA::DNN::EInitialization::kZero
@ kZero

TMVA::DNN::EOptimizer
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:80

TMVA::DNN::EOptimizer::kAdam
@ kAdam

TMVA::DNN::EOptimizer::kRMSProp
@ kRMSProp

TMVA::DNN::EOptimizer::kAdadelta
@ kAdadelta

TMVA::DNN::EOptimizer::kSGD
@ kSGD

TMVA::DNN::EOptimizer::kAdagrad
@ kAdagrad

TMVA::DNN::EOutputFunction
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:44

TMVA::DNN::EOutputFunction::kSoftmax
@ kSoftmax

TMVA::DNN::EOutputFunction::kSigmoid
@ kSigmoid

TMVA::DNN::EOutputFunction::kIdentity
@ kIdentity

TMVA::DNN::weightDecay
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496

TMVA::DNN::regularization
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:216

TMVA::DNN::ERegularization
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63

TMVA::DNN::ERegularization::kL2
@ kL2

TMVA::DNN::ERegularization::kL1
@ kL1

TMVA::DNN::ERegularization::kNone
@ kNone

TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32

TMVA::DNN::EActivationFunction::kRelu
@ kRelu

TMVA::DNN::EActivationFunction::kGauss
@ kGauss

TMVA::DNN::EActivationFunction::kTanh
@ kTanh

TMVA::DNN::EActivationFunction::kSigmoid
@ kSigmoid

TMVA::DNN::EActivationFunction::kIdentity
@ kIdentity

TMVA::DNN::EActivationFunction::kSoftSign
@ kSoftSign

TMVA::DNN::EActivationFunction::kSymmRelu
@ kSymmRelu

TMVA::DNN::ELossFunction
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:55

TMVA::DNN::ELossFunction::kCrossEntropy
@ kCrossEntropy

TMVA::DNN::TMVAInput_t
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40

TMVA
create variable transformations
Definition: GeneticMinimizer.h:21

TMVA::gConfig
Config & gConfig()

TMVA::gTools
Tools & gTools()

TMVA::fetchValueTmp
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDL.cxx:69

TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158

TMath::IsNaN
Bool_t IsNaN(Double_t x)
Definition: TMath.h:880

TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:748

TMVA::TTrainingSettings
All of the options that can be specified in the training string.
Definition: MethodDL.h:62

TMVA::TTrainingSettings::batchSize
size_t batchSize
Definition: MethodDL.h:63

TMVA::TTrainingSettings::multithreading
bool multithreading
Definition: MethodDL.h:74

TMVA::TTrainingSettings::optimizerName
TString optimizerName
Definition: MethodDL.h:69

TMVA::TTrainingSettings::optimizer
DNN::EOptimizer optimizer
Definition: MethodDL.h:68

TMVA::TTrainingSettings::maxEpochs
size_t maxEpochs
Definition: MethodDL.h:66

TMVA::TTrainingSettings::momentum
Double_t momentum
Definition: MethodDL.h:71

TMVA::TTrainingSettings::weightDecay
Double_t weightDecay
Definition: MethodDL.h:72

TMVA::TTrainingSettings::testInterval
size_t testInterval
Definition: MethodDL.h:64

TMVA::TTrainingSettings::regularization
DNN::ERegularization regularization
Definition: MethodDL.h:67

TMVA::TTrainingSettings::convergenceSteps
size_t convergenceSteps
Definition: MethodDL.h:65

TMVA::TTrainingSettings::dropoutProbabilities
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:73

TMVA::TTrainingSettings::learningRate
Double_t learningRate
Definition: MethodDL.h:70

m
auto * m
Definition: textangle.C:8

l
auto * l
Definition: textangle.C:4

t1
auto * t1
Definition: textangle.C:20

Functions.h

Types.h

output
static void output(int code)
Definition: gifencode.c:226