Logo ROOT  
Reference Guide
MethodDL.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/cnn:$Id$Ndl
2 // Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3 /**********************************************************************************
4  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5  * Package: TMVA *
6  * Class : MethodDL *
7  * Web : http://tmva.sourceforge.net *
8  * *
9  * Description: *
10  * Deep Neural Network Method *
11  * *
12  * Authors (alphabetical): *
13  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14  * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15  * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16  * *
17  * Copyright (c) 2005-2015: *
18  * CERN, Switzerland *
19  * U. of Victoria, Canada *
20  * MPI-K Heidelberg, Germany *
21  * U. of Bonn, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 #include "TFormula.h"
29 #include "TString.h"
30 #include "TMath.h"
31 #include "TObjString.h"
32 
33 #include "TMVA/Tools.h"
34 #include "TMVA/Configurable.h"
35 #include "TMVA/IMethod.h"
36 #include "TMVA/ClassifierFactory.h"
37 #include "TMVA/MethodDL.h"
38 #include "TMVA/Types.h"
40 #include "TMVA/DNN/Functions.h"
41 #include "TMVA/DNN/DLMinimizers.h"
42 #include "TMVA/DNN/SGD.h"
43 #include "TMVA/DNN/Adam.h"
44 #include "TMVA/DNN/Adagrad.h"
45 #include "TMVA/DNN/RMSProp.h"
46 #include "TMVA/DNN/Adadelta.h"
47 #include "TMVA/Timer.h"
48 
49 #ifdef R__HAS_TMVAGPU
51 #ifdef R__HAS_CUDNN
53 #endif
54 #endif
55 
56 #include <chrono>
57 
60 
61 using namespace TMVA::DNN::CNN;
62 using namespace TMVA::DNN;
63 
69 
70 
71 namespace TMVA {
72 
73 
74 ////////////////////////////////////////////////////////////////////////////////
75 TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
76 {
77  key.ToUpper();
78  std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
79  if (it == keyValueMap.end()) {
80  return TString("");
81  }
82  return it->second;
83 }
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 template <typename T>
87 T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
88 
89 ////////////////////////////////////////////////////////////////////////////////
90 template <>
91 int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
92 {
93  TString value(fetchValueTmp(keyValueMap, key));
94  if (value == "") {
95  return defaultValue;
96  }
97  return value.Atoi();
98 }
99 
100 ////////////////////////////////////////////////////////////////////////////////
101 template <>
102 double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
103 {
104  TString value(fetchValueTmp(keyValueMap, key));
105  if (value == "") {
106  return defaultValue;
107  }
108  return value.Atof();
109 }
110 
111 ////////////////////////////////////////////////////////////////////////////////
112 template <>
113 TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
114 {
115  TString value(fetchValueTmp(keyValueMap, key));
116  if (value == "") {
117  return defaultValue;
118  }
119  return value;
120 }
121 
122 ////////////////////////////////////////////////////////////////////////////////
123 template <>
124 bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
125 {
126  TString value(fetchValueTmp(keyValueMap, key));
127  if (value == "") {
128  return defaultValue;
129  }
130 
131  value.ToUpper();
132  if (value == "TRUE" || value == "T" || value == "1") {
133  return true;
134  }
135 
136  return false;
137 }
138 
139 ////////////////////////////////////////////////////////////////////////////////
140 template <>
141 std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
142  std::vector<double> defaultValue)
143 {
144  TString parseString(fetchValueTmp(keyValueMap, key));
145  if (parseString == "") {
146  return defaultValue;
147  }
148 
149  parseString.ToUpper();
150  std::vector<double> values;
151 
152  const TString tokenDelim("+");
153  TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
154  TIter nextToken(tokenStrings);
155  TObjString *tokenString = (TObjString *)nextToken();
156  for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
157  std::stringstream sstr;
158  double currentValue;
159  sstr << tokenString->GetString().Data();
160  sstr >> currentValue;
161  values.push_back(currentValue);
162  }
163  return values;
164 }
165 
166 ////////////////////////////////////////////////////////////////////////////////
168 {
169  // Set default values for all option strings
170 
171  DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
172 
173  DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
174 
175  DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
176 
177  DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
178  " or cross entropy (binary classification).");
179  AddPreDefVal(TString("CROSSENTROPY"));
180  AddPreDefVal(TString("SUMOFSQUARES"));
181  AddPreDefVal(TString("MUTUALEXCLUSIVE"));
182 
183  DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
184  AddPreDefVal(TString("XAVIER"));
185  AddPreDefVal(TString("XAVIERUNIFORM"));
186  AddPreDefVal(TString("GAUSS"));
187  AddPreDefVal(TString("UNIFORM"));
188  AddPreDefVal(TString("IDENTITY"));
189  AddPreDefVal(TString("ZERO"));
190 
191  DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
192 
193  DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
194  "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
195  "Specify as 100 to use exactly 100 events. (Default: 20%)");
196 
197  DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
198  AddPreDefVal(TString("STANDARD")); // deprecated and not supported anymore
199  AddPreDefVal(TString("CPU"));
200  AddPreDefVal(TString("GPU"));
201  AddPreDefVal(TString("OPENCL")); // not yet implemented
202  AddPreDefVal(TString("CUDNN")); // not needed (by default GPU is now CUDNN if available)
203 
204  // define training strategy separated by a separator "|"
205  DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-3,"
206  "Momentum=0.0,"
207  "ConvergenceSteps=100,"
208  "MaxEpochs=2000,"
209  "Optimizer=ADAM,"
210  "BatchSize=30,"
211  "TestRepetitions=1,"
212  "WeightDecay=0.0,"
213  "Regularization=None,"
214  "DropConfig=0.0",
215  "TrainingStrategy", "Defines the training strategies.");
216 }
217 
218 ////////////////////////////////////////////////////////////////////////////////
220 {
221 
222  if (IgnoreEventsWithNegWeightsInTraining()) {
223  Log() << kINFO << "Will ignore negative events in training!" << Endl;
224  }
225 
226  if (fArchitectureString == "STANDARD") {
227  Log() << kWARNING << "The STANDARD architecture is not supported anymore. "
228  "Please use Architecture=CPU or Architecture=CPU."
229  "See the TMVA Users' Guide for instructions if you "
230  "encounter problems."
231  << Endl;
232  Log() << kINFO << "We will use instead the CPU architecture" << Endl;
233  fArchitectureString = "CPU";
234  }
235  if (fArchitectureString == "OPENCL") {
236  Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
237  "Please use Architecture=CPU or Architecture=CPU for the "
238  "time being. See the TMVA Users' Guide for instructions "
239  "if you encounter problems."
240  << Endl;
241  // use instead GPU
242  Log() << kINFO << "We will try using the GPU-CUDA architecture if available" << Endl;
243  fArchitectureString = "GPU";
244  }
245 
246  // the architecture can now be set at runtime as an option
247 
248 
249  if (fArchitectureString == "GPU" || fArchitectureString == "CUDNN") {
250 #ifdef R__HAS_TMVAGPU
251  Log() << kINFO << "Will now use the GPU architecture !" << Endl;
252 #else // case TMVA does not support GPU
253  Log() << kERROR << "CUDA backend not enabled. Please make sure "
254  "you have CUDA installed and it was successfully "
255  "detected by CMAKE by using -Dtmva-gpu=On "
256  << Endl;
257  fArchitectureString = "CPU";
258  Log() << kINFO << "Will now use instead the CPU architecture !" << Endl;
259 #endif
260  }
261 
262  if (fArchitectureString == "CPU") {
263 #ifdef R__HAS_TMVACPU // TMVA has CPU BLAS and IMT support
264  Log() << kINFO << "Will now use the CPU architecture with BLAS and IMT support !" << Endl;
265 #else // TMVA has no CPU BLAS or IMT support
266  Log() << kINFO << "Multi-core CPU backend not enabled. For better performances, make sure "
267  "you have a BLAS implementation and it was successfully "
268  "detected by CMake as well that the imt CMake flag is set."
269  << Endl;
270  Log() << kINFO << "Will use anyway the CPU architecture but with slower performance" << Endl;
271 #endif
272  }
273 
274  // Input Layout
275  ParseInputLayout();
276  ParseBatchLayout();
277 
278  // Loss function and output.
279  fOutputFunction = EOutputFunction::kSigmoid;
280  if (fAnalysisType == Types::kClassification) {
281  if (fErrorStrategy == "SUMOFSQUARES") {
282  fLossFunction = ELossFunction::kMeanSquaredError;
283  }
284  if (fErrorStrategy == "CROSSENTROPY") {
285  fLossFunction = ELossFunction::kCrossEntropy;
286  }
287  fOutputFunction = EOutputFunction::kSigmoid;
288  } else if (fAnalysisType == Types::kRegression) {
289  if (fErrorStrategy != "SUMOFSQUARES") {
290  Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
291  << " neural net error function. Setting error function to "
292  << " SUMOFSQUARES now." << Endl;
293  }
294 
295  fLossFunction = ELossFunction::kMeanSquaredError;
296  fOutputFunction = EOutputFunction::kIdentity;
297  } else if (fAnalysisType == Types::kMulticlass) {
298  if (fErrorStrategy == "SUMOFSQUARES") {
299  fLossFunction = ELossFunction::kMeanSquaredError;
300  }
301  if (fErrorStrategy == "CROSSENTROPY") {
302  fLossFunction = ELossFunction::kCrossEntropy;
303  }
304  if (fErrorStrategy == "MUTUALEXCLUSIVE") {
305  fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
306  }
307  fOutputFunction = EOutputFunction::kSoftmax;
308  }
309 
310  // Initialization
311  // the biases will be always initialized to zero
312  if (fWeightInitializationString == "XAVIER") {
313  fWeightInitialization = DNN::EInitialization::kGlorotNormal;
314  } else if (fWeightInitializationString == "XAVIERUNIFORM") {
315  fWeightInitialization = DNN::EInitialization::kGlorotUniform;
316  } else if (fWeightInitializationString == "GAUSS") {
317  fWeightInitialization = DNN::EInitialization::kGauss;
318  } else if (fWeightInitializationString == "UNIFORM") {
319  fWeightInitialization = DNN::EInitialization::kUniform;
320  } else if (fWeightInitializationString == "ZERO") {
321  fWeightInitialization = DNN::EInitialization::kZero;
322  } else if (fWeightInitializationString == "IDENTITY") {
323  fWeightInitialization = DNN::EInitialization::kIdentity;
324  } else {
325  fWeightInitialization = DNN::EInitialization::kGlorotUniform;
326  }
327 
328  // Training settings.
329 
330  KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString, TString("|"), TString(","));
331  for (auto &block : strategyKeyValues) {
332  TTrainingSettings settings;
333 
334  settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
335  settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
336  settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
337  settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
338  settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
339  settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
340  settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
341  settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
342 
343  TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
344  if (regularization == "L1") {
346  } else if (regularization == "L2") {
348  } else {
350  }
351 
352  TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
353  settings.optimizerName = optimizer;
354  if (optimizer == "SGD") {
355  settings.optimizer = DNN::EOptimizer::kSGD;
356  } else if (optimizer == "ADAM") {
358  } else if (optimizer == "ADAGRAD") {
360  } else if (optimizer == "RMSPROP") {
362  } else if (optimizer == "ADADELTA") {
364  } else {
365  // Make Adam as default choice if the input string is
366  // incorrect.
368  settings.optimizerName = "ADAM";
369  }
370  // check for specific optimizer parameters
371  std::vector<TString> optimParamLabels = {"_beta1", "_beta2", "_eps", "_rho"};
372  //default values
373  std::map<TString, double> defaultValues = {
374  {"ADADELTA_eps", 1.E-8}, {"ADADELTA_rho", 0.95},
375  {"ADAGRAD_eps", 1.E-8},
376  {"ADAM_beta1", 0.9}, {"ADAM_beta2", 0.999}, {"ADAM_eps", 1.E-7},
377  {"RMSPROP_eps", 1.E-7}, {"RMSPROP_rho", 0.9},
378  };
379  for (auto &pN : optimParamLabels) {
380  TString optimParamName = settings.optimizerName + pN;
381  // check if optimizer has default values for this specific parameters
382  if (defaultValues.count(optimParamName) > 0) {
383  double defValue = defaultValues[optimParamName];
384  double val = fetchValueTmp(block, optimParamName, defValue);
385  // create entry in settings for this optimizer parameter
386  settings.optimizerParams[optimParamName] = val;
387  }
388  }
389 
390  fTrainingSettings.push_back(settings);
391  }
392 
393  // this set fInputShape[0] = batchSize
394  this->SetBatchSize(fTrainingSettings.front().batchSize);
395 
396  // case inputlayout and batch layout was not given. Use default then
397  // (1, batchsize, nvariables)
398  // fInputShape[0] -> BatchSize
399  // fInputShape[1] -> InputDepth
400  // fInputShape[2] -> InputHeight
401  // fInputShape[3] -> InputWidth
402  if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
403  fInputShape[1] = 1;
404  fInputShape[2] = 1;
405  fInputShape[3] = GetNVariables();
406  }
407  // case when batch layout is not provided (all zero)
408  // batch layout can be determined by the input layout + batch size
409  // case DNN : { 1, B, W }
410  // case CNN : { B, C, H*W}
411  // case RNN : { B, T, H*W }
412 
413  if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
414  // case first layer is DENSE
415  if (fInputShape[2] == 1 && fInputShape[1] == 1) {
416  // case of (1, batchsize, input features)
417  fBatchDepth = 1;
418  fBatchHeight = fTrainingSettings.front().batchSize;
419  fBatchWidth = fInputShape[3];
420  }
421  else { // more general cases (e.g. for CNN)
422  // case CONV or RNN
423  fBatchDepth = fTrainingSettings.front().batchSize;
424  fBatchHeight = fInputShape[1];
425  fBatchWidth = fInputShape[3]*fInputShape[2];
426  }
427  }
428 }
429 
430 ////////////////////////////////////////////////////////////////////////////////
431 /// default initializations
433 {
434  // Nothing to do here
435 }
436 
437 ////////////////////////////////////////////////////////////////////////////////
438 /// Parse the input layout
440 {
441  // Define the delimiter
442  const TString delim("|");
443 
444  // Get the input layout string
445  TString inputLayoutString = this->GetInputLayoutString();
446 
447  // Split the input layout string
448  TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
449  TIter nextInputDim(inputDimStrings);
450  TObjString *inputDimString = (TObjString *)nextInputDim();
451 
452  // Go through every token and save its absolute value in the shape array
453  // The first token is the batch size for easy compatibility with cudnn
454  int subDim = 1;
455  std::vector<size_t> inputShape;
456  inputShape.reserve(inputLayoutString.Length()/2 + 2);
457  inputShape.push_back(0); // Will be set later by Trainingsettings, use 0 value now
458  for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
459  // size_t is unsigned
460  subDim = (size_t) abs(inputDimString->GetString().Atoi());
461  // Size among unused dimensions should be set to 1 for cudnn
462  //if (subDim == 0) subDim = 1;
463  inputShape.push_back(subDim);
464  }
465  // it is expected that empty Shape has at least 4 dimensions. We pad the missing one's with 1
466  // for example in case of dense layer input layouts
467  // when we will support 3D convolutions we would need to add extra 1's
468  if (inputShape.size() == 2) {
469  // case of dense layer where only width is specified
470  inputShape.insert(inputShape.begin() + 1, {1,1});
471  }
472  else if (inputShape.size() == 3) {
473  //e.g. case of RNN T,W -> T,1,W
474  inputShape.insert(inputShape.begin() + 2, 1);
475  }
476 
477  this->SetInputShape(inputShape);
478 }
479 
480 ////////////////////////////////////////////////////////////////////////////////
481 /// Parse the input layout
483 {
484  // Define the delimiter
485  const TString delim("|");
486 
487  // Get the input layout string
488  TString batchLayoutString = this->GetBatchLayoutString();
489 
490  size_t batchDepth = 0;
491  size_t batchHeight = 0;
492  size_t batchWidth = 0;
493 
494  // Split the input layout string
495  TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
496  TIter nextBatchDim(batchDimStrings);
497  TObjString *batchDimString = (TObjString *)nextBatchDim();
498  int idxToken = 0;
499 
500  for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
501  switch (idxToken) {
502  case 0: // input depth
503  {
504  TString strDepth(batchDimString->GetString());
505  batchDepth = (size_t)strDepth.Atoi();
506  } break;
507  case 1: // input height
508  {
509  TString strHeight(batchDimString->GetString());
510  batchHeight = (size_t)strHeight.Atoi();
511  } break;
512  case 2: // input width
513  {
514  TString strWidth(batchDimString->GetString());
515  batchWidth = (size_t)strWidth.Atoi();
516  } break;
517  }
518  ++idxToken;
519  }
520 
521  this->SetBatchDepth(batchDepth);
522  this->SetBatchHeight(batchHeight);
523  this->SetBatchWidth(batchWidth);
524 }
525 
526 ////////////////////////////////////////////////////////////////////////////////
527 /// Create a deep net based on the layout string
528 template <typename Architecture_t, typename Layer_t>
530  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets)
531 {
532  // Layer specification, layer details
533  const TString layerDelimiter(",");
534  const TString subDelimiter("|");
535 
536  TString layoutString = this->GetLayoutString();
537 
538  //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
539 
540  // Split layers
541  TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
542  TIter nextLayer(layerStrings);
543  TObjString *layerString = (TObjString *)nextLayer();
544 
545 
546  for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
547 
548  // Split layer details
549  TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
550  TIter nextToken(subStrings);
551  TObjString *token = (TObjString *)nextToken();
552 
553  // Determine the type of the layer
554  TString strLayerType = token->GetString();
555 
556 
557  if (strLayerType == "DENSE") {
558  ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
559  } else if (strLayerType == "CONV") {
560  ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
561  } else if (strLayerType == "MAXPOOL") {
562  ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
563  } else if (strLayerType == "RESHAPE") {
564  ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
565  } else if (strLayerType == "BNORM") {
566  ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
567  } else if (strLayerType == "RNN") {
568  ParseRecurrentLayer(kLayerRNN, deepNet, nets, layerString->GetString(), subDelimiter);
569  } else if (strLayerType == "LSTM") {
570  ParseRecurrentLayer(kLayerLSTM, deepNet, nets, layerString->GetString(), subDelimiter);
571  } else if (strLayerType == "GRU") {
572  ParseRecurrentLayer(kLayerGRU, deepNet, nets, layerString->GetString(), subDelimiter);
573  } else {
574  // no type of layer specified - assume is dense layer as in old DNN interface
575  ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
576  }
577  }
578 }
579 
580 ////////////////////////////////////////////////////////////////////////////////
581 /// Pases the layer string and creates the appropriate dense layer
582 template <typename Architecture_t, typename Layer_t>
584  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
585  TString delim)
586 {
587  int width = 0;
588  EActivationFunction activationFunction = EActivationFunction::kTanh;
589 
590  // this return number of input variables for the method
591  // it can be used to deduce width of dense layer if specified as N+10
592  // where N is the number of input variables
593  const size_t inputSize = GetNvar();
594 
595  // Split layer details
596  TObjArray *subStrings = layerString.Tokenize(delim);
597  TIter nextToken(subStrings);
598  TObjString *token = (TObjString *)nextToken();
599  int idxToken = 0;
600 
601  // loop on the tokens
602  // order of sepcifying width and activation function is not relevant
603  // both 100|TANH and TANH|100 are valid cases
604  for (; token != nullptr; token = (TObjString *)nextToken()) {
605  idxToken++;
606  // try a match with the activation function
607  TString strActFnc(token->GetString());
608  // if first token defines the layer type- skip it
609  if (strActFnc =="DENSE") continue;
610 
611  if (strActFnc == "RELU") {
612  activationFunction = DNN::EActivationFunction::kRelu;
613  } else if (strActFnc == "TANH") {
614  activationFunction = DNN::EActivationFunction::kTanh;
615  } else if (strActFnc == "FTANH") {
616  activationFunction = DNN::EActivationFunction::kFastTanh;
617  } else if (strActFnc == "SYMMRELU") {
618  activationFunction = DNN::EActivationFunction::kSymmRelu;
619  } else if (strActFnc == "SOFTSIGN") {
620  activationFunction = DNN::EActivationFunction::kSoftSign;
621  } else if (strActFnc == "SIGMOID") {
622  activationFunction = DNN::EActivationFunction::kSigmoid;
623  } else if (strActFnc == "LINEAR") {
624  activationFunction = DNN::EActivationFunction::kIdentity;
625  } else if (strActFnc == "GAUSS") {
626  activationFunction = DNN::EActivationFunction::kGauss;
627  } else if (width == 0) {
628  // no match found try to parse as text showing the width
629  // support for input a formula where the variable 'x' is 'N' in the string
630  // use TFormula for the evaluation
631  TString strNumNodes = strActFnc;
632  // number of nodes
633  TString strN("x");
634  strNumNodes.ReplaceAll("N", strN);
635  strNumNodes.ReplaceAll("n", strN);
636  TFormula fml("tmp", strNumNodes);
637  width = fml.Eval(inputSize);
638  }
639  }
640  // avoid zero width. assume is last layer and give width = output width
641  // Determine the number of outputs
642  size_t outputSize = 1;
643  if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
644  outputSize = GetNTargets();
645  } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
646  outputSize = DataInfo().GetNClasses();
647  }
648  if (width == 0) width = outputSize;
649 
650  // Add the dense layer, initialize the weights and biases and copy
651  TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
652  denseLayer->Initialize();
653 
654  // add same layer to fNet
655  if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
656 
657  //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
658 
659  // add the copy to all slave nets
660  //for (size_t i = 0; i < nets.size(); i++) {
661  // nets[i].AddDenseLayer(copyDenseLayer);
662  //}
663 
664  // check compatibility of added layer
665  // for a dense layer input should be 1 x 1 x DxHxW
666 }
667 
668 ////////////////////////////////////////////////////////////////////////////////
669 /// Pases the layer string and creates the appropriate convolutional layer
670 template <typename Architecture_t, typename Layer_t>
672  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
673  TString delim)
674 {
675  int depth = 0;
676  int fltHeight = 0;
677  int fltWidth = 0;
678  int strideRows = 0;
679  int strideCols = 0;
680  int zeroPadHeight = 0;
681  int zeroPadWidth = 0;
682  EActivationFunction activationFunction = EActivationFunction::kTanh;
683 
684  // Split layer details
685  TObjArray *subStrings = layerString.Tokenize(delim);
686  TIter nextToken(subStrings);
687  TObjString *token = (TObjString *)nextToken();
688  int idxToken = 0;
689 
690  for (; token != nullptr; token = (TObjString *)nextToken()) {
691  switch (idxToken) {
692  case 1: // depth
693  {
694  TString strDepth(token->GetString());
695  depth = strDepth.Atoi();
696  } break;
697  case 2: // filter height
698  {
699  TString strFltHeight(token->GetString());
700  fltHeight = strFltHeight.Atoi();
701  } break;
702  case 3: // filter width
703  {
704  TString strFltWidth(token->GetString());
705  fltWidth = strFltWidth.Atoi();
706  } break;
707  case 4: // stride in rows
708  {
709  TString strStrideRows(token->GetString());
710  strideRows = strStrideRows.Atoi();
711  } break;
712  case 5: // stride in cols
713  {
714  TString strStrideCols(token->GetString());
715  strideCols = strStrideCols.Atoi();
716  } break;
717  case 6: // zero padding height
718  {
719  TString strZeroPadHeight(token->GetString());
720  zeroPadHeight = strZeroPadHeight.Atoi();
721  } break;
722  case 7: // zero padding width
723  {
724  TString strZeroPadWidth(token->GetString());
725  zeroPadWidth = strZeroPadWidth.Atoi();
726  } break;
727  case 8: // activation function
728  {
729  TString strActFnc(token->GetString());
730  if (strActFnc == "RELU") {
731  activationFunction = DNN::EActivationFunction::kRelu;
732  } else if (strActFnc == "TANH") {
733  activationFunction = DNN::EActivationFunction::kTanh;
734  } else if (strActFnc == "SYMMRELU") {
735  activationFunction = DNN::EActivationFunction::kSymmRelu;
736  } else if (strActFnc == "SOFTSIGN") {
737  activationFunction = DNN::EActivationFunction::kSoftSign;
738  } else if (strActFnc == "SIGMOID") {
739  activationFunction = DNN::EActivationFunction::kSigmoid;
740  } else if (strActFnc == "LINEAR") {
741  activationFunction = DNN::EActivationFunction::kIdentity;
742  } else if (strActFnc == "GAUSS") {
743  activationFunction = DNN::EActivationFunction::kGauss;
744  }
745  } break;
746  }
747  ++idxToken;
748  }
749 
750  // Add the convolutional layer, initialize the weights and biases and copy
751  TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
752  zeroPadHeight, zeroPadWidth, activationFunction);
753  convLayer->Initialize();
754 
755  // Add same layer to fNet
756  if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
757  zeroPadHeight, zeroPadWidth, activationFunction);
758 
759  //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
760 
761  //// add the copy to all slave nets
762  //for (size_t i = 0; i < nets.size(); i++) {
763  // nets[i].AddConvLayer(copyConvLayer);
764  //}
765 }
766 
767 ////////////////////////////////////////////////////////////////////////////////
768 /// Pases the layer string and creates the appropriate max pool layer
769 template <typename Architecture_t, typename Layer_t>
771  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
772  TString delim)
773 {
774 
775  int filterHeight = 0;
776  int filterWidth = 0;
777  int strideRows = 0;
778  int strideCols = 0;
779 
780  // Split layer details
781  TObjArray *subStrings = layerString.Tokenize(delim);
782  TIter nextToken(subStrings);
783  TObjString *token = (TObjString *)nextToken();
784  int idxToken = 0;
785 
786  for (; token != nullptr; token = (TObjString *)nextToken()) {
787  switch (idxToken) {
788  case 1: // filter height
789  {
790  TString strFrmHeight(token->GetString());
791  filterHeight = strFrmHeight.Atoi();
792  } break;
793  case 2: // filter width
794  {
795  TString strFrmWidth(token->GetString());
796  filterWidth = strFrmWidth.Atoi();
797  } break;
798  case 3: // stride in rows
799  {
800  TString strStrideRows(token->GetString());
801  strideRows = strStrideRows.Atoi();
802  } break;
803  case 4: // stride in cols
804  {
805  TString strStrideCols(token->GetString());
806  strideCols = strStrideCols.Atoi();
807  } break;
808  }
809  ++idxToken;
810  }
811 
812  // Add the Max pooling layer
813  // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
814  deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
815 
816  // Add the same layer to fNet
817  if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
818 
819 
820  //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
821 
822  //// add the copy to all slave nets
823  //for (size_t i = 0; i < nets.size(); i++) {
824  // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
825  //}
826 }
827 
828 ////////////////////////////////////////////////////////////////////////////////
829 /// Pases the layer string and creates the appropriate reshape layer
830 template <typename Architecture_t, typename Layer_t>
832  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
833  TString delim)
834 {
835  int depth = 0;
836  int height = 0;
837  int width = 0;
838  bool flattening = false;
839 
840  // Split layer details
841  TObjArray *subStrings = layerString.Tokenize(delim);
842  TIter nextToken(subStrings);
843  TObjString *token = (TObjString *)nextToken();
844  int idxToken = 0;
845 
846  for (; token != nullptr; token = (TObjString *)nextToken()) {
847  if (token->GetString() == "FLAT") idxToken=4;
848  switch (idxToken) {
849  case 1: {
850  TString strDepth(token->GetString());
851  depth = strDepth.Atoi();
852  } break;
853  case 2: // height
854  {
855  TString strHeight(token->GetString());
856  height = strHeight.Atoi();
857  } break;
858  case 3: // width
859  {
860  TString strWidth(token->GetString());
861  width = strWidth.Atoi();
862  } break;
863  case 4: // flattening
864  {
865  TString flat(token->GetString());
866  if (flat == "FLAT") {
867  flattening = true;
868  }
869  } break;
870  }
871  ++idxToken;
872  }
873 
874  // Add the reshape layer
875  // TReshapeLayer<Architecture_t> *reshapeLayer =
876  deepNet.AddReshapeLayer(depth, height, width, flattening);
877 
878  // Add the same layer to fNet
879  if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
880 
881  //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
882 
883  //// add the copy to all slave nets
884  //for (size_t i = 0; i < nets.size(); i++) {
885  // nets[i].AddReshapeLayer(copyReshapeLayer);
886  //}
887 }
888 
889 ////////////////////////////////////////////////////////////////////////////////
890 /// Pases the layer string and creates the appropriate reshape layer
891 template <typename Architecture_t, typename Layer_t>
893  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
894  TString delim)
895 {
896 
897  // default values
898  double momentum = -1; //0.99;
899  double epsilon = 0.0001;
900 
901  // Split layer details
902  TObjArray *subStrings = layerString.Tokenize(delim);
903  TIter nextToken(subStrings);
904  TObjString *token = (TObjString *)nextToken();
905  int idxToken = 0;
906 
907  for (; token != nullptr; token = (TObjString *)nextToken()) {
908  switch (idxToken) {
909  case 1: {
910  momentum = std::atof(token->GetString().Data());
911  } break;
912  case 2: // height
913  {
914  epsilon = std::atof(token->GetString().Data());
915  } break;
916  }
917  ++idxToken;
918  }
919 
920  // Add the batch norm layer
921  //
922  auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
923  layer->Initialize();
924 
925  // Add the same layer to fNet
926  if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
927 
928 }
929 
930 ////////////////////////////////////////////////////////////////////////////////
931 /// Pases the layer string and creates the appropriate rnn layer
932 template <typename Architecture_t, typename Layer_t>
934  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
935  TString delim)
936 {
937  // int depth = 0;
938  int stateSize = 0;
939  int inputSize = 0;
940  int timeSteps = 0;
941  bool rememberState = false;
942  bool returnSequence = false;
943  bool resetGateAfter = false;
944 
945  // Split layer details
946  TObjArray *subStrings = layerString.Tokenize(delim);
947  TIter nextToken(subStrings);
948  TObjString *token = (TObjString *)nextToken();
949  int idxToken = 0;
950 
951  for (; token != nullptr; token = (TObjString *)nextToken()) {
952  switch (idxToken) {
953  case 1: // state size
954  {
955  TString strstateSize(token->GetString());
956  stateSize = strstateSize.Atoi();
957  break;
958  }
959  case 2: // input size
960  {
961  TString strinputSize(token->GetString());
962  inputSize = strinputSize.Atoi();
963  break;
964  }
965  case 3: // time steps
966  {
967  TString strtimeSteps(token->GetString());
968  timeSteps = strtimeSteps.Atoi();
969  break;
970  }
971  case 4: // returnSequence (option stateful in Keras)
972  {
973  TString strrememberState(token->GetString());
974  rememberState = (bool) strrememberState.Atoi();
975  break;
976  }
977  case 5: // return full output sequence (1 or 0)
978  {
979  TString str(token->GetString());
980  returnSequence = (bool)str.Atoi();
981  break;
982  }
983  case 6: // resetGate after option (only for GRU)
984  {
985  TString str(token->GetString());
986  resetGateAfter = (bool)str.Atoi();
987  }
988  }
989  ++idxToken;
990  }
991 
992  // Add the recurrent layer, initialize the weights and biases and copy
993  if (rnnType == kLayerRNN) {
994  auto * recurrentLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
995  recurrentLayer->Initialize();
996  // Add same layer to fNet
997  if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
998  }
999  else if (rnnType == kLayerLSTM ) {
1000  auto *recurrentLayer = deepNet.AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1001  recurrentLayer->Initialize();
1002  // Add same layer to fNet
1003  if (fBuildNet)
1004  fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1005  }
1006  else if (rnnType == kLayerGRU) {
1007  if (Architecture_t::IsCudnn()) resetGateAfter = true; // needed for Cudnn
1008  auto *recurrentLayer = deepNet.AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1009  recurrentLayer->Initialize();
1010  // Add same layer to fNet
1011  if (fBuildNet)
1012  fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1013  }
1014  else {
1015  Log() << kFATAL << "Invalid Recurrent layer type " << Endl;
1016  }
1017 }
1018 
1019 ////////////////////////////////////////////////////////////////////////////////
1020 /// Standard constructor.
1021 MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
1022  : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1023  fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1024  fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1025  fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1026  fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1027  fXInput()
1028 {
1029  // Nothing to do here
1030 }
1031 
1032 ////////////////////////////////////////////////////////////////////////////////
1033 /// Constructor from a weight file.
1034 MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
1035  : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1036  fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1037  fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1038  fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1039  fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1040  fXInput()
1041 {
1042  // Nothing to do here
1043 }
1044 
1045 ////////////////////////////////////////////////////////////////////////////////
1046 /// Destructor.
1048 {
1049  // Nothing to do here
1050 }
1051 
1052 ////////////////////////////////////////////////////////////////////////////////
1053 /// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1054 auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1055 {
1056  // remove empty spaces
1057  parseString.ReplaceAll(" ","");
1058  KeyValueVector_t blockKeyValues;
1059  const TString keyValueDelim("=");
1060 
1061  TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1062  TIter nextBlock(blockStrings);
1063  TObjString *blockString = (TObjString *)nextBlock();
1064 
1065  for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1066  blockKeyValues.push_back(std::map<TString, TString>());
1067  std::map<TString, TString> &currentBlock = blockKeyValues.back();
1068 
1069  TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1070  TIter nextToken(subStrings);
1071  TObjString *token = (TObjString *)nextToken();
1072 
1073  for (; token != nullptr; token = (TObjString *)nextToken()) {
1074  TString strKeyValue(token->GetString());
1075  int delimPos = strKeyValue.First(keyValueDelim.Data());
1076  if (delimPos <= 0) continue;
1077 
1078  TString strKey = TString(strKeyValue(0, delimPos));
1079  strKey.ToUpper();
1080  TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1081 
1082  strKey.Strip(TString::kBoth, ' ');
1083  strValue.Strip(TString::kBoth, ' ');
1084 
1085  currentBlock.insert(std::make_pair(strKey, strValue));
1086  }
1087  }
1088  return blockKeyValues;
1089 }
1090 
1091 ////////////////////////////////////////////////////////////////////////////////
1092 /// What kind of analysis type can handle the CNN
1094 {
1095  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1096  if (type == Types::kMulticlass) return kTRUE;
1097  if (type == Types::kRegression) return kTRUE;
1098 
1099  return kFALSE;
1100 }
1101 
1102 ////////////////////////////////////////////////////////////////////////////////
1103 /// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1104 /// 100 etc.
1105 /// - 20% and 0.2 selects 20% of the training set as validation data.
1106 /// - 100 selects 100 events as the validation data.
1107 ///
1108 /// @return number of samples in validation set
1109 ///
1111 {
1112  Int_t nValidationSamples = 0;
1113  UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1114 
1115  // Parsing + Validation
1116  // --------------------
1117  if (fNumValidationString.EndsWith("%")) {
1118  // Relative spec. format 20%
1119  TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1120 
1121  if (intValStr.IsFloat()) {
1122  Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1123  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1124  } else {
1125  Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1126  << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1127  }
1128  } else if (fNumValidationString.IsFloat()) {
1129  Double_t valSizeAsDouble = fNumValidationString.Atof();
1130 
1131  if (valSizeAsDouble < 1.0) {
1132  // Relative spec. format 0.2
1133  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1134  } else {
1135  // Absolute spec format 100 or 100.0
1136  nValidationSamples = valSizeAsDouble;
1137  }
1138  } else {
1139  Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1140  << Endl;
1141  }
1142 
1143  // Value validation
1144  // ----------------
1145  if (nValidationSamples < 0) {
1146  Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1147  }
1148 
1149  if (nValidationSamples == 0) {
1150  Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1151  }
1152 
1153  if (nValidationSamples >= (Int_t)trainingSetSize) {
1154  Log() << kFATAL << "Validation size \"" << fNumValidationString
1155  << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1156  }
1157 
1158  return nValidationSamples;
1159 }
1160 
1161 
1162 ////////////////////////////////////////////////////////////////////////////////
1163 /// Implementation of architecture specific train method
1164 ///
1165 template <typename Architecture_t>
1167 {
1168 
1169  using Scalar_t = typename Architecture_t::Scalar_t;
1172  using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1173 
1174  bool debug = Log().GetMinType() == kDEBUG;
1175 
1176 
1177  // set the random seed for weight initialization
1178  Architecture_t::SetRandomSeed(fRandomSeed);
1179 
1180  ///split training data in training and validation data
1181  // and determine the number of training and testing examples
1182 
1183  size_t nValidationSamples = GetNumValidationSamples();
1184  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1185 
1186  const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1187  const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1188  const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1189 
1190  size_t trainingPhase = 1;
1191 
1192  for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1193 
1194  size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1195 
1196 
1197  // After the processing of the options, initialize the master deep net
1198  size_t batchSize = settings.batchSize;
1199  this->SetBatchSize(batchSize);
1200  // Should be replaced by actual implementation. No support for this now.
1201  size_t inputDepth = this->GetInputDepth();
1202  size_t inputHeight = this->GetInputHeight();
1203  size_t inputWidth = this->GetInputWidth();
1204  size_t batchDepth = this->GetBatchDepth();
1205  size_t batchHeight = this->GetBatchHeight();
1206  size_t batchWidth = this->GetBatchWidth();
1207  ELossFunction J = this->GetLossFunction();
1209  ERegularization R = settings.regularization;
1210  EOptimizer O = settings.optimizer;
1211  Scalar_t weightDecay = settings.weightDecay;
1212 
1213  //Batch size should be included in batch layout as well. There are two possibilities:
1214  // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1215  // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1216  // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1217  // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1218 
1219  if (batchDepth != batchSize && batchDepth > 1) {
1220  Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1221  return;
1222  }
1223  if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1224  Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1225  return;
1226  }
1227 
1228 
1229  //check also that input layout compatible with batch layout
1230  bool badLayout = false;
1231  // case batch depth == batch size
1232  if (batchDepth == batchSize)
1233  badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1234  // case batch Height is batch size
1235  if (batchHeight == batchSize && batchDepth == 1)
1236  badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1237  if (badLayout) {
1238  Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1239  inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1240  return;
1241  }
1242 
1243  // check batch size is compatible with number of events
1244  if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1245  Log() << kFATAL << "Number of samples in the datasets are train: ("
1246  << nTrainingSamples << ") test: (" << nValidationSamples
1247  << "). One of these is smaller than the batch size of "
1248  << settings.batchSize << ". Please increase the batch"
1249  << " size to be at least the same size as the smallest"
1250  << " of them." << Endl;
1251  }
1252 
1253  DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1254 
1255  // create a copy of DeepNet for evaluating but with batch size = 1
1256  // fNet is the saved network and will be with CPU or Referrence architecture
1257  if (trainingPhase == 1) {
1258  fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1259  batchHeight, batchWidth, J, I, R, weightDecay));
1260  fBuildNet = true;
1261  }
1262  else
1263  fBuildNet = false;
1264 
1265  // Initialize the vector of slave nets
1266  std::vector<DeepNet_t> nets{};
1267  nets.reserve(nThreads);
1268  for (size_t i = 0; i < nThreads; i++) {
1269  // create a copies of the master deep net
1270  nets.push_back(deepNet);
1271  }
1272 
1273 
1274  // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1275  CreateDeepNet(deepNet, nets);
1276 
1277 
1278  // set droput probabilities
1279  // use convention to store in the layer 1.- dropout probabilities
1280  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1281  for (auto & p : dropoutVector) {
1282  p = 1.0 - p;
1283  }
1284  deepNet.SetDropoutProbabilities(dropoutVector);
1285 
1286  if (trainingPhase > 1) {
1287  // copy initial weights from fNet to deepnet
1288  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1289  deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1290  }
1291  }
1292 
1293  // when fNet is built create also input matrix that will be used to evaluate it
1294  if (fBuildNet) {
1295  //int n1 = batchHeight;
1296  //int n2 = batchWidth;
1297  // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1298  //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1299  //fXInput = TensorImpl_t(1,n1,n2);
1301  if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1302  fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1303  fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1304 
1305 
1306  // create pointer to output matrix used for the predictions
1307  fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1308 
1309  // print the created network
1310  Log() << "***** Deep Learning Network *****" << Endl;
1311  if (Log().GetMinType() <= kINFO)
1312  deepNet.Print();
1313  }
1314  Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1315 
1316  // Loading the training and validation datasets
1317  TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1318  TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1319  {inputDepth, inputHeight, inputWidth},
1320  {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1321  deepNet.GetOutputWidth(), nThreads);
1322 
1323  TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1324  TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1325  {inputDepth, inputHeight, inputWidth},
1326  { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1327  deepNet.GetOutputWidth(), nThreads);
1328 
1329 
1330 
1331  // do an evaluation of the network to compute initial minimum test error
1332 
1333  Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1334 
1335  Double_t minValError = 0.0;
1336  Log() << "Compute initial loss on the validation data " << Endl;
1337  for (auto batch : validationData) {
1338  auto inputTensor = batch.GetInput();
1339  auto outputMatrix = batch.GetOutput();
1340  auto weights = batch.GetWeights();
1341 
1342  //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1343  // should we apply droput to the loss ??
1344  minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1345  }
1346  // add Regularization term
1347  Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1348  minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1349  minValError += regzTerm;
1350 
1351 
1352  // create a pointer to base class VOptimizer
1353  std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1354 
1355  // initialize the base class pointer with the corresponding derived class object.
1356  switch (O) {
1357 
1358  case EOptimizer::kSGD:
1359  optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1360  new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1361  break;
1362 
1363  case EOptimizer::kAdam: {
1364  optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1366  deepNet, settings.learningRate, settings.optimizerParams["ADAM_beta1"],
1367  settings.optimizerParams["ADAM_beta2"], settings.optimizerParams["ADAM_eps"]));
1368  break;
1369  }
1370 
1371  case EOptimizer::kAdagrad:
1372  optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1373  new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1374  settings.optimizerParams["ADAGRAD_eps"]));
1375  break;
1376 
1377  case EOptimizer::kRMSProp:
1378  optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1379  new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum,
1380  settings.optimizerParams["RMSPROP_rho"],
1381  settings.optimizerParams["RMSPROP_eps"]));
1382  break;
1383 
1384  case EOptimizer::kAdadelta:
1385  optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1386  new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1387  settings.optimizerParams["ADADELTA_rho"],
1388  settings.optimizerParams["ADADELTA_eps"]));
1389  break;
1390  }
1391 
1392 
1393  // Initialize the vector of batches, one batch for one slave network
1394  std::vector<TTensorBatch<Architecture_t>> batches{};
1395 
1396  bool converged = false;
1397  size_t convergenceCount = 0;
1398  size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1399 
1400  // start measuring
1401  std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1402  tstart = std::chrono::system_clock::now();
1403 
1404  // function building string with optimizer parameters values for logging
1405  auto optimParametersString = [&]() {
1406  TString optimParameters;
1407  for ( auto & element : settings.optimizerParams) {
1408  TString key = element.first;
1409  key.ReplaceAll(settings.optimizerName + "_", ""); // strip optimizerName_
1410  double value = element.second;
1411  if (!optimParameters.IsNull())
1412  optimParameters += ",";
1413  else
1414  optimParameters += " (";
1415  optimParameters += TString::Format("%s=%g", key.Data(), value);
1416  }
1417  if (!optimParameters.IsNull())
1418  optimParameters += ")";
1419  return optimParameters;
1420  };
1421 
1422  Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1423  << " Optimizer " << settings.optimizerName
1424  << optimParametersString()
1425  << " Learning rate = " << settings.learningRate << " regularization " << (char)settings.regularization
1426  << " minimum error = " << minValError << Endl;
1427  if (!fInteractive) {
1428  std::string separator(62, '-');
1429  Log() << separator << Endl;
1430  Log() << std::setw(10) << "Epoch"
1431  << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err." << std::setw(12)
1432  << "t(s)/epoch" << std::setw(12) << "t(s)/Loss" << std::setw(12) << "nEvents/s" << std::setw(12)
1433  << "Conv. Steps" << Endl;
1434  Log() << separator << Endl;
1435  }
1436 
1437  // set up generator for shuffling the batches
1438  // if seed is zero we have always a different order in the batches
1439  size_t shuffleSeed = 0;
1440  if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1441  RandomGenerator<TRandom3> rng(shuffleSeed);
1442 
1443  // print weights before
1444  if (fBuildNet && debug) {
1445  Log() << "Initial Deep Net Weights " << Endl;
1446  auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1447  for (size_t l = 0; l < weights_tensor.size(); ++l)
1448  weights_tensor[l].Print();
1449  auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1450  bias_tensor[0].Print();
1451  }
1452 
1453  Log() << " Start epoch iteration ..." << Endl;
1454  bool debugFirstEpoch = false;
1455  bool computeLossInTraining = true; // compute loss in training or at test time
1456  size_t nTrainEpochs = 0;
1457  while (!converged) {
1458  nTrainEpochs++;
1459  trainingData.Shuffle(rng);
1460 
1461  // execute all epochs
1462  //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1463 
1464  Double_t trainingError = 0;
1465  for (size_t i = 0; i < batchesInEpoch; ++i ) {
1466  // Clean and load new batches, one batch for one slave net
1467  //batches.clear();
1468  //batches.reserve(nThreads);
1469  //for (size_t j = 0; j < nThreads; j++) {
1470  // batches.push_back(trainingData.GetTensorBatch());
1471  //}
1472  if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1473 
1474  auto my_batch = trainingData.GetTensorBatch();
1475 
1476  if (debugFirstEpoch)
1477  std::cout << "got batch data - doing forward \n";
1478 
1479 #ifdef DEBUG
1480 
1481  Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1482  typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1483  typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1484  Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1485  Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1486 #endif
1487 
1488  deepNet.Forward(my_batch.GetInput(), true);
1489  // compute also loss
1490  if (computeLossInTraining) {
1491  auto outputMatrix = my_batch.GetOutput();
1492  auto weights = my_batch.GetWeights();
1493  trainingError += deepNet.Loss(outputMatrix, weights, false);
1494  }
1495 
1496  if (debugFirstEpoch)
1497  std::cout << "- doing backward \n";
1498 
1499 #ifdef DEBUG
1500  size_t nlayers = deepNet.GetLayers().size();
1501  for (size_t l = 0; l < nlayers; ++l) {
1502  if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1503  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1504  TString::Format("initial weights layer %d", l).Data());
1505 
1506  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1507  TString::Format("output tensor layer %d", l).Data());
1508  }
1509 #endif
1510 
1511  //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1512 
1513  deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1514 
1515  if (debugFirstEpoch)
1516  std::cout << "- doing optimizer update \n";
1517 
1518  // increment optimizer step that is used in some algorithms (e.g. ADAM)
1519  optimizer->IncrementGlobalStep();
1520  optimizer->Step();
1521 
1522 #ifdef DEBUG
1523  std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1524  for (size_t l = 0; l < nlayers; ++l) {
1525  if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1526  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1527  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1528  }
1529  }
1530 #endif
1531 
1532  }
1533 
1534  if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1535  //}
1536  debugFirstEpoch = false;
1537  if ((nTrainEpochs % settings.testInterval) == 0) {
1538 
1539  std::chrono::time_point<std::chrono::system_clock> t1,t2;
1540 
1541  t1 = std::chrono::system_clock::now();
1542 
1543  // Compute validation error.
1544 
1545 
1546  Double_t valError = 0.0;
1547  bool inTraining = false;
1548  for (auto batch : validationData) {
1549  auto inputTensor = batch.GetInput();
1550  auto outputMatrix = batch.GetOutput();
1551  auto weights = batch.GetWeights();
1552  // should we apply droput to the loss ??
1553  valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1554  }
1555  // normalize loss to number of batches and add regularization term
1556  Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1557  valError /= (Double_t)(nValidationSamples / settings.batchSize);
1558  valError += regTerm;
1559 
1560  //Log the loss value
1561  fTrainHistory.AddValue("valError",nTrainEpochs,valError);
1562 
1563  t2 = std::chrono::system_clock::now();
1564 
1565  // checking for convergence
1566  if (valError < minValError) {
1567  convergenceCount = 0;
1568  } else {
1569  convergenceCount += settings.testInterval;
1570  }
1571 
1572  // copy configuration when reached a minimum error
1573  if (valError < minValError ) {
1574  // Copy weights from deepNet to fNet
1575  Log() << std::setw(10) << nTrainEpochs
1576  << " Minimum Test error found - save the configuration " << Endl;
1577  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1578  fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1579  // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1580  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1581  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1582  // }
1583  }
1584  // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1585  // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1586 
1587  minValError = valError;
1588  }
1589  else if ( minValError <= 0. )
1590  minValError = valError;
1591 
1592  if (!computeLossInTraining) {
1593  trainingError = 0.0;
1594  // Compute training error.
1595  for (auto batch : trainingData) {
1596  auto inputTensor = batch.GetInput();
1597  auto outputMatrix = batch.GetOutput();
1598  auto weights = batch.GetWeights();
1599  trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1600  }
1601  }
1602  // normalize loss to number of batches and add regularization term
1603  trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1604  trainingError += regTerm;
1605 
1606  //Log the loss value
1607  fTrainHistory.AddValue("trainingError",nTrainEpochs,trainingError);
1608 
1609  // stop measuring
1610  tend = std::chrono::system_clock::now();
1611 
1612  // Compute numerical throughput.
1613  std::chrono::duration<double> elapsed_seconds = tend - tstart;
1614  std::chrono::duration<double> elapsed1 = t1-tstart;
1615  // std::chrono::duration<double> elapsed2 = t2-tstart;
1616  // time to compute training and test errors
1617  std::chrono::duration<double> elapsed_testing = tend-t1;
1618 
1619  double seconds = elapsed_seconds.count();
1620  // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1621  // nGFlops *= deepnet.GetNFlops() * 1e-9;
1622  double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1623 
1624  converged =
1625  convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1626 
1627 
1628  Log() << std::setw(10) << nTrainEpochs << " | "
1629  << std::setw(12) << trainingError
1630  << std::setw(12) << valError
1631  << std::setw(12) << seconds / settings.testInterval
1632  << std::setw(12) << elapsed_testing.count()
1633  << std::setw(12) << 1. / eventTime
1634  << std::setw(12) << convergenceCount
1635  << Endl;
1636 
1637  if (converged) {
1638  Log() << Endl;
1639  }
1640  tstart = std::chrono::system_clock::now();
1641  }
1642 
1643  // if (stepCount % 10 == 0 || converged) {
1644  if (converged && debug) {
1645  Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << nTrainEpochs
1646  << Endl;
1647  auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1648  auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1649  for (size_t l = 0; l < weights_tensor.size(); ++l)
1650  weights_tensor[l].Print();
1651  bias_tensor[0].Print();
1652  }
1653 
1654  }
1655 
1656  trainingPhase++;
1657  } // end loop on training Phase
1658 }
1659 
1660 ////////////////////////////////////////////////////////////////////////////////
1662 {
1663  if (fInteractive) {
1664  Log() << kFATAL << "Not implemented yet" << Endl;
1665  return;
1666  }
1667 
1668  // using for training same scalar type defined for the prediction
1669  if (this->GetArchitectureString() == "GPU") {
1670 #ifdef R__HAS_TMVAGPU
1671  Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1672 #ifdef R__HAS_CUDNN
1673  TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1674 #else
1675  TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1676 #endif
1677 #else
1678  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1679  "you have CUDA installed and it was successfully "
1680  "detected by CMAKE."
1681  << Endl;
1682  return;
1683 #endif
1684  } else if (this->GetArchitectureString() == "CPU") {
1685 #ifdef R__HAS_TMVACPU
1686  // note that number of threads used for BLAS might be different
1687  // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1688  Log() << kINFO << "Start of deep neural network training on CPU using MT, nthreads = "
1689  << gConfig().GetNCpu() << Endl << Endl;
1690 #else
1691  Log() << kINFO << "Start of deep neural network training on single thread CPU (without ROOT-MT support) " << Endl
1692  << Endl;
1693 #endif
1694  TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1695  return;
1696  }
1697  else {
1698  Log() << kFATAL << this->GetArchitectureString() <<
1699  " is not a supported architecture for TMVA::MethodDL"
1700  << Endl;
1701  }
1702 
1703 }
1704 
1705 
1706 ////////////////////////////////////////////////////////////////////////////////
1707 Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1708 {
1709 
1710  // note that fNet should have been build with a batch size of 1
1711 
1712  if (!fNet || fNet->GetDepth() == 0) {
1713  Log() << kFATAL << "The network has not been trained and fNet is not built"
1714  << Endl;
1715  }
1716 
1717  // input size must be equal to 1 which is the batch size of fNet
1718  R__ASSERT(fNet->GetBatchSize() == 1);
1719 
1720  // int batchWidth = fNet->GetBatchWidth();
1721  // int batchDepth = fNet->GetBatchDepth();
1722  // int batchHeight = fNet->GetBatchHeight();
1723 // int noutput = fNet->GetOutputWidth();
1724 
1725 
1726  // get current event
1727  const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1728 
1729  size_t nVariables = GetEvent()->GetNVariables();
1730 
1731  // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1733  R__ASSERT(fXInput.GetShape().size() < 4);
1734  size_t nc, nhw = 0;
1735  if (fXInput.GetShape().size() == 2) {
1736  nc = fXInput.GetShape()[0];
1737  if (nc != 1 ) {
1739  Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1"
1740  << Endl;
1741  }
1742  nhw = fXInput.GetShape()[1];
1743  } else {
1744  nc = fXInput.GetCSize();
1745  nhw = fXInput.GetWSize();
1746  }
1747  if ( nVariables != nc * nhw) {
1748  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1749  << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw
1750  << Endl;
1751  }
1752  for (size_t j = 0; j < nc; j++) {
1753  for (size_t k = 0; k < nhw; k++) {
1754  // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1755  fXInputBuffer[ k * nc + j] = inputValues[j*nhw + k]; // for column layout !!!
1756  }
1757  }
1758  } else {
1759  // row-wise layout
1760  assert(fXInput.GetShape().size() >= 4);
1761  size_t nc = fXInput.GetCSize();
1762  size_t nh = fXInput.GetHSize();
1763  size_t nw = fXInput.GetWSize();
1764  size_t n = nc * nh * nw;
1765  if ( nVariables != n) {
1766  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1767  << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1768  << Endl;
1769  }
1770  for (size_t j = 0; j < n; j++) {
1771  // in this case TMVA event has same order as input tensor
1772  fXInputBuffer[ j ] = inputValues[j]; // for column layout !!!
1773  }
1774  }
1775  // copy buffer in input
1776  fXInput.GetDeviceBuffer().CopyFrom( fXInputBuffer);
1777 
1778  // perform the prediction
1779  fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1780 
1781  // return value
1782  double mvaValue = (*fYHat)(0, 0);
1783 
1784  // for debugging
1785 #ifdef DEBUG_MVAVALUE
1786  using Tensor_t = std::vector<MatrixImpl_t>;
1787  TMatrixF xInput(n1,n2, inputValues.data() );
1788  std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1789  xInput.Print();
1790  std::cout << "Output of DeepNet " << mvaValue << std::endl;
1791  auto & deepnet = *fNet;
1792  std::cout << "Loop on layers " << std::endl;
1793  for (int l = 0; l < deepnet.GetDepth(); ++l) {
1794  std::cout << "Layer " << l;
1795  const auto * layer = deepnet.GetLayerAt(l);
1796  const Tensor_t & layer_output = layer->GetOutput();
1797  layer->Print();
1798  std::cout << "DNN output " << layer_output.size() << std::endl;
1799  for (size_t i = 0; i < layer_output.size(); ++i) {
1800 #ifdef R__HAS_TMVAGPU
1801  //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1802  TMatrixD m = layer_output[i];
1803 #else
1804  TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1805 #endif
1806  m.Print();
1807  }
1808  const Tensor_t & layer_weights = layer->GetWeights();
1809  std::cout << "DNN weights " << layer_weights.size() << std::endl;
1810  if (layer_weights.size() > 0) {
1811  int i = 0;
1812 #ifdef R__HAS_TMVAGPU
1813  TMatrixD m = layer_weights[i];
1814 // TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1815 #else
1816  TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1817 #endif
1818  m.Print();
1819  }
1820  }
1821 #endif
1822 
1823  return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1824 }
1825 ////////////////////////////////////////////////////////////////////////////////
1826 /// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1827 ////////////////////////////////////////////////////////////////////////////////
1828 template <typename Architecture_t>
1829 std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1830 {
1831 
1832  // Check whether the model is setup
1833  if (!fNet || fNet->GetDepth() == 0) {
1834  Log() << kFATAL << "The network has not been trained and fNet is not built"
1835  << Endl;
1836  }
1837 
1838  // rebuild the networks
1839  this->SetBatchSize(batchSize);
1840  size_t inputDepth = this->GetInputDepth();
1841  size_t inputHeight = this->GetInputHeight();
1842  size_t inputWidth = this->GetInputWidth();
1843  size_t batchDepth = this->GetBatchDepth();
1844  size_t batchHeight = this->GetBatchHeight();
1845  size_t batchWidth = this->GetBatchWidth();
1846  ELossFunction J = fNet->GetLossFunction();
1847  EInitialization I = fNet->GetInitialization();
1848  ERegularization R = fNet->GetRegularization();
1849  Double_t weightDecay = fNet->GetWeightDecay();
1850 
1851  using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1852  using Matrix_t = typename Architecture_t::Matrix_t;
1853  using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1854 
1855  // create the deep neural network
1856  DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1857  std::vector<DeepNet_t> nets{};
1858  fBuildNet = false;
1859  CreateDeepNet(deepNet,nets);
1860 
1861  // copy weights from the saved fNet to the built DeepNet
1862  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1863  deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1864  // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1865  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1866  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1867  // }
1868  }
1869 
1870  size_t n1 = deepNet.GetBatchHeight();
1871  size_t n2 = deepNet.GetBatchWidth();
1872  size_t n0 = deepNet.GetBatchSize();
1873  // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1874  if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1875  n1 = deepNet.GetBatchSize();
1876  n0 = 1;
1877  }
1878  //this->SetBatchDepth(n0);
1879  Long64_t nEvents = lastEvt - firstEvt;
1880  TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1881  TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1882 
1883 
1884  // Tensor_t xInput;
1885  // for (size_t i = 0; i < n0; ++i)
1886  // xInput.emplace_back(Matrix_t(n1,n2));
1887 
1888  // create pointer to output matrix used for the predictions
1889  Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1890 
1891  // use timer
1892  Timer timer( nEvents, GetName(), kTRUE );
1893 
1894  if (logProgress)
1895  Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1896  << "Evaluation of " << GetMethodName() << " on "
1897  << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1898  << " sample (" << nEvents << " events)" << Endl;
1899 
1900 
1901  // eventg loop
1902  std::vector<double> mvaValues(nEvents);
1903 
1904 
1905  for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1906 
1907  Long64_t ievt_end = ievt + batchSize;
1908  // case of batch prediction for
1909  if (ievt_end <= lastEvt) {
1910 
1911  if (ievt == firstEvt) {
1912  Data()->SetCurrentEvent(ievt);
1913  size_t nVariables = GetEvent()->GetNVariables();
1914 
1915  if (n1 == batchSize && n0 == 1) {
1916  if (n2 != nVariables) {
1917  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1918  << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1919  << Endl;
1920  }
1921  } else {
1922  if (n1*n2 != nVariables || n0 != batchSize) {
1923  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1924  << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1925  << Endl;
1926  }
1927  }
1928  }
1929 
1930  auto batch = testData.GetTensorBatch();
1931  auto inputTensor = batch.GetInput();
1932 
1933  auto xInput = batch.GetInput();
1934  // make the prediction
1935  deepNet.Prediction(yHat, xInput, fOutputFunction);
1936  for (size_t i = 0; i < batchSize; ++i) {
1937  double value = yHat(i,0);
1938  mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1939  }
1940  }
1941  else {
1942  // case of remaining events: compute prediction by single event !
1943  for (Long64_t i = ievt; i < lastEvt; ++i) {
1944  Data()->SetCurrentEvent(i);
1945  mvaValues[i] = GetMvaValue();
1946  }
1947  }
1948  }
1949 
1950  if (logProgress) {
1951  Log() << kINFO
1952  << "Elapsed time for evaluation of " << nEvents << " events: "
1953  << timer.GetElapsedTime() << " " << Endl;
1954  }
1955 
1956  return mvaValues;
1957 }
1958 
1959 const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1960 {
1961  size_t nVariables = GetEvent()->GetNVariables();
1962  MatrixImpl_t X(1, nVariables);
1963  TensorImpl_t X_vec ( 1, 1, nVariables); // needs to be really 1
1964  const Event *ev = GetEvent();
1965  const std::vector<Float_t>& inputValues = ev->GetValues();
1966  for (size_t i = 0; i < nVariables; i++) {
1967  X_vec(0,i,0) = inputValues[i]; // in case of column format !!
1968  }
1969  //X_vec.emplace_back(X);
1970 
1971  size_t nTargets = std::max(1u, ev->GetNTargets());
1972  MatrixImpl_t YHat(1, nTargets);
1973  std::vector<Float_t> output(nTargets);
1974  fNet->Prediction(YHat, X_vec, fOutputFunction);
1975 
1976  for (size_t i = 0; i < nTargets; i++)
1977  output[i] = YHat(0, i);
1978 
1979  if (fRegressionReturnVal == NULL) {
1980  fRegressionReturnVal = new std::vector<Float_t>();
1981  }
1982  fRegressionReturnVal->clear();
1983 
1984  Event * evT = new Event(*ev);
1985  for (size_t i = 0; i < nTargets; ++i) {
1986  evT->SetTarget(i, output[i]);
1987  }
1988 
1989  const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1990  for (size_t i = 0; i < nTargets; ++i) {
1991  fRegressionReturnVal->push_back(evT2->GetTarget(i));
1992  }
1993  delete evT;
1994  return *fRegressionReturnVal;
1995 }
1996 
1997 const std::vector<Float_t> & TMVA::MethodDL::GetMulticlassValues()
1998 {
1999  size_t nVariables = GetEvent()->GetNVariables();
2000  MatrixImpl_t X(1, nVariables);
2001  TensorImpl_t X_vec ( 1, 1, nVariables);
2002  MatrixImpl_t YHat(1, DataInfo().GetNClasses());
2003  if (fMulticlassReturnVal == NULL) {
2004  fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
2005  }
2006 
2007  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
2008  for (size_t i = 0; i < nVariables; i++) {
2009  X_vec(0,i, 0) = inputValues[i];
2010  }
2011  //X_vec.emplace_back(X);
2012  fNet->Prediction(YHat, X_vec, fOutputFunction);
2013  for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
2014  (*fMulticlassReturnVal)[i] = YHat(0, i);
2015  }
2016  return *fMulticlassReturnVal;
2017 }
2018 
2019 
2020 ////////////////////////////////////////////////////////////////////////////////
2021 /// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
2022 ////////////////////////////////////////////////////////////////////////////////
2023 std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
2024 {
2025  // Long64_t nEvents = Data()->GetNEvents();
2026  // std::vector<Double_t> v(nEvents);
2027  // for (Long64_t i = 0; i < nEvents; ++i) {
2028  // Data()->SetCurrentEvent(i);
2029  // v[i] = GetMvaValue();
2030  // }
2031  // return v;
2032 
2033 
2034  Long64_t nEvents = Data()->GetNEvents();
2035  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2036  if (firstEvt < 0) firstEvt = 0;
2037  nEvents = lastEvt-firstEvt;
2038 
2039  // use same batch size as for training (from first strategy)
2040  size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2041  size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2042  if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
2043 
2044  // using for training same scalar type defined for the prediction
2045  if (this->GetArchitectureString() == "GPU") {
2046 #ifdef R__HAS_TMVAGPU
2047  Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2048 #ifdef R__HAS_CUDNN
2049  return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2050 #else
2051  return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2052 #endif
2053 
2054 #endif
2055  }
2056  Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2057  return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2058 }
2059 ////////////////////////////////////////////////////////////////////////////////
2060 void MethodDL::AddWeightsXMLTo(void * parent) const
2061 {
2062  // Create the parent XML node with name "Weights"
2063  auto & xmlEngine = gTools().xmlengine();
2064  void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2065 
2066  /*! Get all necessary information, in order to be able to reconstruct the net
2067  * if we read the same XML file. */
2068 
2069  // Deep Net specific info
2070  Int_t depth = fNet->GetDepth();
2071 
2072  Int_t inputDepth = fNet->GetInputDepth();
2073  Int_t inputHeight = fNet->GetInputHeight();
2074  Int_t inputWidth = fNet->GetInputWidth();
2075 
2076  Int_t batchSize = fNet->GetBatchSize();
2077 
2078  Int_t batchDepth = fNet->GetBatchDepth();
2079  Int_t batchHeight = fNet->GetBatchHeight();
2080  Int_t batchWidth = fNet->GetBatchWidth();
2081 
2082  char lossFunction = static_cast<char>(fNet->GetLossFunction());
2083  char initialization = static_cast<char>(fNet->GetInitialization());
2084  char regularization = static_cast<char>(fNet->GetRegularization());
2085 
2086  Double_t weightDecay = fNet->GetWeightDecay();
2087 
2088  // Method specific info (not sure these are needed)
2089  char outputFunction = static_cast<char>(this->GetOutputFunction());
2090  //char lossFunction = static_cast<char>(this->GetLossFunction());
2091 
2092  // Add attributes to the parent node
2093  xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2094 
2095  xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2096  xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2097  xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2098 
2099  xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2100  xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2101  xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2102  xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2103 
2104  xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2105  xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2106  xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2107  xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2108 
2109  gTools().AddAttr(nn, "WeightDecay", weightDecay);
2110 
2111 
2112  for (Int_t i = 0; i < depth; i++)
2113  {
2114  fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2115  }
2116 
2117 
2118 }
2119 
2120 ////////////////////////////////////////////////////////////////////////////////
2121 void MethodDL::ReadWeightsFromXML(void * rootXML)
2122 {
2123 
2124  auto netXML = gTools().GetChild(rootXML, "Weights");
2125  if (!netXML){
2126  netXML = rootXML;
2127  }
2128 
2129  size_t netDepth;
2130  gTools().ReadAttr(netXML, "NetDepth", netDepth);
2131 
2132  size_t inputDepth, inputHeight, inputWidth;
2133  gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2134  gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2135  gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2136 
2137  size_t batchSize, batchDepth, batchHeight, batchWidth;
2138  gTools().ReadAttr(netXML, "BatchSize", batchSize);
2139  // use always batchsize = 1
2140  //batchSize = 1;
2141  gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2142  gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2143  gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2144 
2145  char lossFunctionChar;
2146  gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2147  char initializationChar;
2148  gTools().ReadAttr(netXML, "Initialization", initializationChar);
2149  char regularizationChar;
2150  gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2151  char outputFunctionChar;
2152  gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2153  double weightDecay;
2154  gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2155 
2156  // create the net
2157 
2158  // DeepNetCpu_t is defined in MethodDL.h
2159  this->SetInputDepth(inputDepth);
2160  this->SetInputHeight(inputHeight);
2161  this->SetInputWidth(inputWidth);
2162  this->SetBatchDepth(batchDepth);
2163  this->SetBatchHeight(batchHeight);
2164  this->SetBatchWidth(batchWidth);
2165 
2166 
2167 
2168  fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2169  batchHeight, batchWidth,
2170  static_cast<ELossFunction>(lossFunctionChar),
2171  static_cast<EInitialization>(initializationChar),
2172  static_cast<ERegularization>(regularizationChar),
2173  weightDecay));
2174 
2175  fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
2176 
2177 
2178  //size_t previousWidth = inputWidth;
2179  auto layerXML = gTools().xmlengine().GetChild(netXML);
2180 
2181  // loop on the layer and add them to the network
2182  for (size_t i = 0; i < netDepth; i++) {
2183 
2184  TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2185 
2186  // case of dense layer
2187  if (layerName == "DenseLayer") {
2188 
2189  // read width and activation function and then we can create the layer
2190  size_t width = 0;
2191  gTools().ReadAttr(layerXML, "Width", width);
2192 
2193  // Read activation function.
2194  TString funcString;
2195  gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2196  EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2197 
2198 
2199  fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2200 
2201  }
2202  // Convolutional Layer
2203  else if (layerName == "ConvLayer") {
2204 
2205  // read width and activation function and then we can create the layer
2206  size_t depth = 0;
2207  gTools().ReadAttr(layerXML, "Depth", depth);
2208  size_t fltHeight, fltWidth = 0;
2209  size_t strideRows, strideCols = 0;
2210  size_t padHeight, padWidth = 0;
2211  gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2212  gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2213  gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2214  gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2215  gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2216  gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2217 
2218  // Read activation function.
2219  TString funcString;
2220  gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2221  EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2222 
2223 
2224  fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2225  padHeight, padWidth, actFunction);
2226 
2227  }
2228 
2229  // MaxPool Layer
2230  else if (layerName == "MaxPoolLayer") {
2231 
2232  // read maxpool layer info
2233  size_t filterHeight, filterWidth = 0;
2234  size_t strideRows, strideCols = 0;
2235  gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2236  gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2237  gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2238  gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2239 
2240  fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2241  }
2242  // Reshape Layer
2243  else if (layerName == "ReshapeLayer") {
2244 
2245  // read reshape layer info
2246  size_t depth, height, width = 0;
2247  gTools().ReadAttr(layerXML, "Depth", depth);
2248  gTools().ReadAttr(layerXML, "Height", height);
2249  gTools().ReadAttr(layerXML, "Width", width);
2250  int flattening = 0;
2251  gTools().ReadAttr(layerXML, "Flattening",flattening );
2252 
2253  fNet->AddReshapeLayer(depth, height, width, flattening);
2254 
2255  }
2256  // RNN Layer
2257  else if (layerName == "RNNLayer") {
2258 
2259  // read RNN layer info
2260  size_t stateSize,inputSize, timeSteps = 0;
2261  int rememberState= 0;
2262  int returnSequence = 0;
2263  gTools().ReadAttr(layerXML, "StateSize", stateSize);
2264  gTools().ReadAttr(layerXML, "InputSize", inputSize);
2265  gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2266  gTools().ReadAttr(layerXML, "RememberState", rememberState );
2267  gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2268 
2269  fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2270 
2271  }
2272  // LSTM Layer
2273  else if (layerName == "LSTMLayer") {
2274 
2275  // read RNN layer info
2276  size_t stateSize,inputSize, timeSteps = 0;
2277  int rememberState, returnSequence = 0;
2278  gTools().ReadAttr(layerXML, "StateSize", stateSize);
2279  gTools().ReadAttr(layerXML, "InputSize", inputSize);
2280  gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2281  gTools().ReadAttr(layerXML, "RememberState", rememberState );
2282  gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2283 
2284  fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2285 
2286  }
2287  // GRU Layer
2288  else if (layerName == "GRULayer") {
2289 
2290  // read RNN layer info
2291  size_t stateSize,inputSize, timeSteps = 0;
2292  int rememberState, returnSequence, resetGateAfter = 0;
2293  gTools().ReadAttr(layerXML, "StateSize", stateSize);
2294  gTools().ReadAttr(layerXML, "InputSize", inputSize);
2295  gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2296  gTools().ReadAttr(layerXML, "RememberState", rememberState );
2297  gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2298  gTools().ReadAttr(layerXML, "ResetGateAfter", resetGateAfter);
2299 
2300  if (!resetGateAfter && ArchitectureImpl_t::IsCudnn())
2301  Warning("ReadWeightsFromXML",
2302  "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2303 
2304  fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2305  }
2306  // BatchNorm Layer
2307  else if (layerName == "BatchNormLayer") {
2308  // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2309  fNet->AddBatchNormLayer(0., 0.0);
2310  }
2311  // read weights and biases
2312  fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2313 
2314  // read next layer
2315  layerXML = gTools().GetNextChild(layerXML);
2316  }
2317 
2318  fBuildNet = false;
2319  // create now the input and output matrices
2320  //int n1 = batchHeight;
2321  //int n2 = batchWidth;
2322  // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2323  //if (fXInput.size() > 0) fXInput.clear();
2324  //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2326  if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2327  // make here a ColumnMajor tensor
2329  fXInputBuffer = HostBufferImpl_t( fXInput.GetSize());
2330 
2331  // create pointer to output matrix used for the predictions
2332  fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2333 
2334 
2335 }
2336 
2337 
2338 ////////////////////////////////////////////////////////////////////////////////
2339 void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2340 {
2341 }
2342 
2343 ////////////////////////////////////////////////////////////////////////////////
2345 {
2346  // TODO
2347  return NULL;
2348 }
2349 
2350 ////////////////////////////////////////////////////////////////////////////////
2352 {
2353  // TODO
2354 }
2355 
2356 } // namespace TMVA
TMVA::Experimental::MemoryLayout::ColumnMajor
@ ColumnMajor
TMVA::DNN::TDeepNet::AddReshapeLayer
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:773
TMVA::MethodDL::Train
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1661
make_cnn_model.optimizer
optimizer
Definition: make_cnn_model.py:15
l
auto * l
Definition: textangle.C:4
TMVA::DNN::EInitialization::kUniform
@ kUniform
Adadelta.h
ROOT::TMetaUtils::propNames::separator
static const std::string separator("@@@")
m
auto * m
Definition: textangle.C:8
Adam.h
n
const Int_t n
Definition: legend1.C:16
TMVA::MethodDL::TrainDeepNet
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1166
TMVA::MethodDL::fBuildNet
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:197
TMVA::MethodDL::ParseMaxPoolLayer
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:770
TMVA::DNN::TDeepNet::AddConvLayer
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:439
TMVA::MethodDL::fYHat
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:204
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:100
TMVA::Configurable::Log
MsgLogger & Log() const
Definition: Configurable.h:122
TMVA::DataSet::GetCurrentType
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:194
TMVA::Types::kMulticlass
@ kMulticlass
Definition: Types.h:131
TMVA::MethodDL::fOutputFunction
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:185
TMVA::TTrainingSettings::optimizerName
TString optimizerName
Definition: MethodDL.h:79
TMVA::DNN::EOptimizer
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:82
e
#define e(i)
Definition: RSha256.hxx:103
TMVA::DNN::EInitialization::kGlorotUniform
@ kGlorotUniform
TMVA::DNN::TDeepNet::AddBatchNormLayer
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition: DeepNet.h:825
TMVA::Tools::GetChild
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TMVA::MethodBase::Data
DataSet * Data() const
Definition: MethodBase.h:409
TMVA::DNN::EOutputFunction::kSigmoid
@ kSigmoid
TObjArray
An array of TObjects.
Definition: TObjArray.h:37
TMVA::MethodDL::AddWeightsXMLTo
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:2060
TMVA::MethodDL::GetHelpMessage
void GetHelpMessage() const
Definition: MethodDL.cxx:2351
TMVA::MethodDL::GetArchitectureString
TString GetArchitectureString() const
Definition: MethodDL.h:274
TMVA::kERROR
@ kERROR
Definition: Types.h:62
TMVA::DNN::EOutputFunction
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
TXMLEngine::NewChild
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:715
TString::Atoi
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1943
TMVA::DNN
Definition: Adadelta.h:36
TFormula
The Formula class.
Definition: TFormula.h:87
TString::Strip
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1128
TMVA::Types::kRegression
@ kRegression
Definition: Types.h:130
TObject::Print
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:552
TMVA::DNN::EInitialization::kZero
@ kZero
TMVA::MethodDL::HostBufferImpl_t
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition: MethodDL.h:110
TMVA::DNN::TDeepNet::AddDenseLayer
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:740
TMVA::DNN::ERegularization
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:65
TString::Data
const char * Data() const
Definition: TString.h:369
TMVA::MethodDL::GetMulticlassValues
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDL.cxx:1997
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
Form
char * Form(const char *fmt,...)
TMVA::Ranking
Ranking for variables in method (implementation)
Definition: Ranking.h:48
TMVA::DNN::EInitialization::kIdentity
@ kIdentity
TObjString.h
TMVA::Event::GetClass
UInt_t GetClass() const
Definition: Event.h:86
TMVA::DNN::regularization
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
TMVA::DNN::ERegularization::kL1
@ kL1
TMVA::TTrainingSettings::maxEpochs
size_t maxEpochs
Definition: MethodDL.h:76
IMethod.h
TMVA::DNN::EOptimizer::kAdam
@ kAdam
TMVA::MethodDL::CreateDeepNet
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:529
Long64_t
long long Long64_t
Definition: RtypesCore.h:80
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
TObject::Error
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:893
output
static void output(int code)
Definition: gifencode.c:226
TMVA::MethodDL::GetInputHeight
size_t GetInputHeight() const
Definition: MethodDL.h:252
TMVA::MethodDL::DeclareOptions
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:167
TMVA::RandomGenerator
Definition: Tools.h:305
TString::Atof
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:2009
TMVA::MethodDL::CreateRanking
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2344
TMVA::MethodDL::ParseReshapeLayer
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:831
TMVA::MethodDL::GetBatchWidth
size_t GetBatchWidth() const
Definition: MethodDL.h:260
TMVA::MethodDL::PredictDeepNet
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1829
width
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
TMVA::MethodDL::~MethodDL
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:1047
TMVA::DNN::ELossFunction::kMeanSquaredError
@ kMeanSquaredError
TMVA::TTrainingSettings::learningRate
Double_t learningRate
Definition: MethodDL.h:80
TMVA::DNN::TMVAInput_t
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
TObjString::GetString
const TString & GetString() const
Definition: TObjString.h:46
TMVA::DNN::TDeepNet::AddBasicRNNLayer
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:524
TMVA::MethodDL::ParseRecurrentLayer
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:933
TMVA::DNN::CNN::TConvLayer
Definition: ConvLayer.h:75
TMVA::DNN::ELossFunction::kCrossEntropy
@ kCrossEntropy
TString::Length
Ssiz_t Length() const
Definition: TString.h:410
TMVA::MethodDL::SetInputHeight
void SetInputHeight(int inputHeight)
Definition: MethodDL.h:283
TMVA::DNN::ERegularization::kNone
@ kNone
TMVA::MethodDL::SetBatchHeight
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:289
TXMLEngine::GetChild
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
Definition: TXMLEngine.cxx:1146
TMVA::DNN::EInitialization::kGlorotNormal
@ kGlorotNormal
TMVA::DNN::EActivationFunction::kSoftSign
@ kSoftSign
TMVA::DNN::TDenseLayer
Generic layer class.
Definition: DenseLayer.h:59
TMVA::MethodDL::DeepNetImpl_t
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition: MethodDL.h:106
TMVA::MsgLogger::GetMinType
EMsgType GetMinType() const
Definition: MsgLogger.h:71
TMVA::Event::GetTarget
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
TMVA::Event::SetTarget
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:359
TMVA::DNN::TCpu::PrintTensor
static void PrintTensor(const Tensor_t &A, const std::string name="Cpu-tensor", bool truncate=false)
Definition: Cpu.h:862
TMVA::DNN::EOptimizer::kSGD
@ kSGD
TMVA::DNN::EActivationFunction::kGauss
@ kGauss
TString::IsFloat
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1813
TMVA::MethodDL::fTrainingSettings
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:200
TString::Format
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2333
TMVA::MethodDL::Init
void Init()
default initializations
Definition: MethodDL.cxx:432
TMVA::MethodDL::SetInputDepth
void SetInputDepth(int inputDepth)
Setters.
Definition: MethodDL.h:282
TMVA::DNN::EActivationFunction::kTanh
@ kTanh
TMVA::DNN::weightDecay
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
TMVA::DNN::TTensorDataLoader
TTensorDataLoader.
Definition: TensorDataLoader.h:133
TString
Basic string class.
Definition: TString.h:136
TMatrixT
TMatrixT.
Definition: TMatrixT.h:39
TMVA::MethodDL::GetMvaValue
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1707
TMVA::MethodDL::HasAnalysisType
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:1093
TMVA::MethodDL::fXInputBuffer
HostBufferImpl_t fXInputBuffer
Definition: MethodDL.h:203
TMVA::MethodDL::MatrixImpl_t
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:107
TString.h
REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition: ClassifierFactory.h:124
bool
TString::ReplaceAll
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:692
TMVA::MethodDL::SetBatchWidth
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:290
TMVA::Tools::xmlengine
TXMLEngine & xmlengine()
Definition: Tools.h:268
TString::kBoth
@ kBoth
Definition: TString.h:267
TMVA::DNN::TAdagrad
Adagrad Optimizer class.
Definition: Adagrad.h:45
TMVA::MethodBase::DataInfo
DataSetInfo & DataInfo() const
Definition: MethodBase.h:410
DLMinimizers.h
Cuda.h
TString::ToUpper
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1160
TObjString
Collectable string class.
Definition: TObjString.h:28
TMVA::TTrainingSettings::weightDecay
Double_t weightDecay
Definition: MethodDL.h:82
TMVA::Tools::AddAttr
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:353
TMVA::MethodDL::GetLossFunction
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:266
TMVA::TTrainingSettings::batchSize
size_t batchSize
Definition: MethodDL.h:73
R
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
TMVA::MethodDL::ParseKeyValueString
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:1054
TMVA::DNN::VGeneralLayer
Generic General Layer class.
Definition: GeneralLayer.h:51
TMVA::MethodDL::ParseInputLayout
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:439
TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:62
TensorDataLoader.h
TMVA::DNN::TAdam
Adam Optimizer class.
Definition: Adam.h:45
TString::Tokenize
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2219
TMVA::MethodBase::fInteractive
IPythonInteractive * fInteractive
temporary dataset used when evaluating on a different data (used by MethodCategory::GetMvaValues)
Definition: MethodBase.h:448
TMVA::Timer::GetElapsedTime
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:146
TMVA::DNN::TRMSProp
RMSProp Optimizer class.
Definition: RMSProp.h:45
TMVA::MethodBase::GetMethodName
const TString & GetMethodName() const
Definition: MethodBase.h:331
TMVA::MethodDL::ERecurrentLayerType
ERecurrentLayerType
Definition: MethodDL.h:153
Timer.h
TMVA::Event::GetValues
std::vector< Float_t > & GetValues()
Definition: Event.h:94
TString::kTrailing
@ kTrailing
Definition: TString.h:267
TMVA::DNN::EActivationFunction::kFastTanh
@ kFastTanh
TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:128
TNamed::Print
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:128
TMath::IsNaN
Bool_t IsNaN(Double_t x)
Definition: TMath.h:892
TMVA::Event::GetNVariables
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:308
epsilon
REAL epsilon
Definition: triangle.c:617
TMVA::DNN::EActivationFunction::kSymmRelu
@ kSymmRelu
TMVA::TrainingHistory::AddValue
void AddValue(TString Property, Int_t stage, Double_t value)
Definition: TrainingHistory.cxx:47
TMVA::DataSet::GetNEvents
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:206
TMVA::DNN::ERegularization::kL2
@ kL2
TMVA::gConfig
Config & gConfig()
TMVA::MethodDL::GetTrainingSettings
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:276
TMVA::DNN::VGeneralLayer::Initialize
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:395
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:101
TMVA::MethodDL::ParseBatchLayout
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:482
TMVA::MethodDL::SetBatchDepth
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:288
TMVA::MethodDL::SetInputWidth
void SetInputWidth(int inputWidth)
Definition: MethodDL.h:284
TMVA::DNN::EActivationFunction::kRelu
@ kRelu
SGD.h
TMVA::TTrainingSettings::regularization
DNN::ERegularization regularization
Definition: MethodDL.h:77
TMVA::Tools::ReadAttr
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
TFormula::Eval
Double_t Eval(Double_t x) const
TString::First
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:520
TMVA::Types::kClassification
@ kClassification
Definition: Types.h:129
TMVA::MethodDL::GetRegressionValues
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDL.cxx:1959
TMVA::TTrainingSettings::optimizerParams
std::map< TString, double > optimizerParams
Definition: MethodDL.h:84
TMVA::MethodDL::ReadWeightsFromStream
virtual void ReadWeightsFromStream(std::istream &)=0
Methods for writing and reading weights.
Adagrad.h
UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:46
TMVA::Tools::StringFromInt
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1235
TMVA::MethodDL::ProcessOptions
void ProcessOptions()
Definition: MethodDL.cxx:219
TMVA::DNN::TAdadelta
Adadelta Optimizer class.
Definition: Adadelta.h:45
TMVA::DNN::EOutputFunction::kIdentity
@ kIdentity
TMVA::MethodBase
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
TCudnn.h
TMVA::Types
Singleton class for Global types used by TMVA.
Definition: Types.h:73
TMVA::MethodDL::fNet
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:205
TMVA::DNN::EOutputFunction::kSoftmax
@ kSoftmax
TMVA::MethodBase::GetEventCollection
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
Definition: MethodBase.cxx:3351
Types.h
Configurable.h
TMVA::DNN::EOptimizer::kRMSProp
@ kRMSProp
TObject::Warning
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:879
TMVA::MethodDL::ParseDenseLayer
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:583
TMVA::TTrainingSettings::optimizer
DNN::EOptimizer optimizer
Definition: MethodDL.h:78
TMVA::MethodDL::KeyValueVector_t
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:93
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
unsigned int
TMVA::Timer
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TMVA::Types::kTraining
@ kTraining
Definition: Types.h:145
TMVA::DNN::EInitialization
EInitialization
Definition: Functions.h:72
TMVA::TTrainingSettings::convergenceSteps
size_t convergenceSteps
Definition: MethodDL.h:75
TMVA::DNN::ELossFunction
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
Functions.h
TMVA::DNN::TDeepNet::AddMaxPoolLayer
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:485
MethodDL.h
TMVA::DataSet::SetCurrentEvent
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:88
TMVA::DNN::EOptimizer::kAdadelta
@ kAdadelta
TString::IsNull
Bool_t IsNull() const
Definition: TString.h:407
Double_t
double Double_t
Definition: RtypesCore.h:59
R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:118
TMVA::MethodDL::ParseBatchNormLayer
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:892
t1
auto * t1
Definition: textangle.C:20
TMVA::TTrainingSettings::momentum
Double_t momentum
Definition: MethodDL.h:81
TMVA::kFATAL
@ kFATAL
Definition: Types.h:63
TMVA::MethodDL::MethodDL
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:1021
TMVA::Tools::GetNextChild
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
TMVA::DNN::EInitialization::kGauss
@ kGauss
TMVA::MethodBase::GetName
const char * GetName() const
Definition: MethodBase.h:334
TMVA::DNN::ELossFunction::kSoftmaxCrossEntropy
@ kSoftmaxCrossEntropy
TMVA::Event
Definition: Event.h:51
RMSProp.h
TMVA::kDEBUG
@ kDEBUG
Definition: Types.h:58
TMVA::MethodBase::GetEvent
const Event * GetEvent() const
Definition: MethodBase.h:751
TMVA::MethodDL::GetBatchDepth
size_t GetBatchDepth() const
Definition: MethodDL.h:258
ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:34
TMVA::DNN::TDeepNet::AddBasicGRULayer
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:608
TMVA::MethodDL::GetInputDepth
size_t GetInputDepth() const
Definition: MethodDL.h:251
TMVA::kHEADER
@ kHEADER
Definition: Types.h:65
TMVA::DNN::EActivationFunction::kSigmoid
@ kSigmoid
TMVA::DNN::EActivationFunction::kIdentity
@ kIdentity
I
#define I(x, y, z)
TIter
Definition: TCollection.h:233
TMVA::MethodBase::fTrainHistory
TrainingHistory fTrainHistory
Definition: MethodBase.h:425
TMVA::MethodDL
Definition: MethodDL.h:89
TMVA::MethodDL::GetWeightInitialization
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:264
TMVA::kINFO
@ kINFO
Definition: Types.h:60
TMVA::TTrainingSettings::testInterval
size_t testInterval
Definition: MethodDL.h:74
TMVA::MethodDL::ParseConvLayer
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:671
TMVA::DNN::TCpu::CreateTensor
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition: Cpu.h:108
TMVA::MethodDL::GetMvaValues
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
Definition: MethodDL.cxx:2023
Tools.h
TMVA::DNN::TCpu::IsCudnn
static bool IsCudnn()
Definition: Cpu.h:131
ClassifierFactory.h
type
int type
Definition: TGX11.cxx:121
TMVA::MethodDL::fRandomSeed
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:182
TMVA::MethodDL::SetBatchSize
void SetBatchSize(size_t batchSize)
Definition: MethodDL.h:287
TMVA::MethodDL::GetBatchHeight
size_t GetBatchHeight() const
Definition: MethodDL.h:259
TXMLEngine::GetNodeName
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
Definition: TXMLEngine.cxx:1075
TMVA::gTools
Tools & gTools()
TMVA::MethodDL::GetInputWidth
size_t GetInputWidth() const
Definition: MethodDL.h:253
TMVA::MethodDL::GetOutputFunction
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:265
TMVA::MethodDL::fXInput
TensorImpl_t fXInput
Definition: MethodDL.h:202
TMVA::TTrainingSettings
All of the options that can be specified in the training string.
Definition: MethodDL.h:72
TMVA::MethodDL::GetNumValidationSamples
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
TMVA::DNN::TDeepNet::AddBasicLSTMLayer
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:567
TMVA::MethodDL::TensorImpl_t
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition: MethodDL.h:108
TMVA::kWARNING
@ kWARNING
Definition: Types.h:61
TMVA::DNN::TDeepNet
Generic Deep Neural Network class.
Definition: DeepNet.h:73
TMVA::DNN::EOptimizer::kAdagrad
@ kAdagrad
TMVA::fetchValueTmp
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDL.cxx:75
TMVA::DNN::CNN
Definition: ContextHandles.h:43
TMath.h
TMVA::TTrainingSettings::dropoutProbabilities
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:83
TMVA::Event::GetNTargets
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
TMVA::DNN::TSGD
Stochastic Batch Gradient Descent Optimizer class.
Definition: SGD.h:46
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
int
TMVA::Config::GetNCpu
UInt_t GetNCpu()
Definition: Config.h:72
TMVA::MethodDL::ReadWeightsFromXML
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:2121