Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodDL.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/cnn:$Id$Ndl
2// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3/**********************************************************************************
4 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5 * Package: TMVA *
6 * Class : MethodDL *
7 * *
8 * *
9 * Description: *
10 * Deep Neural Network Method *
11 * *
12 * Authors (alphabetical): *
13 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (see tmva/doc/LICENSE) *
26 **********************************************************************************/
27
28#include "TFormula.h"
29#include "TString.h"
30#include "TMath.h"
31#include "TObjString.h"
32
33#include "TMVA/Tools.h"
34#include "TMVA/Configurable.h"
35#include "TMVA/IMethod.h"
37#include "TMVA/MethodDL.h"
38#include "TMVA/Types.h"
40#include "TMVA/DNN/Functions.h"
42#include "TMVA/DNN/SGD.h"
43#include "TMVA/DNN/Adam.h"
44#include "TMVA/DNN/Adagrad.h"
45#include "TMVA/DNN/RMSProp.h"
46#include "TMVA/DNN/Adadelta.h"
47#include "TMVA/Timer.h"
48
49#ifdef R__HAS_TMVAGPU
51#ifdef R__HAS_CUDNN
53#endif
54#endif
55
56#include <chrono>
57
60
61using namespace TMVA::DNN::CNN;
62using namespace TMVA::DNN;
63
69
70
71namespace TMVA {
72
73
74////////////////////////////////////////////////////////////////////////////////
75TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
76{
77 key.ToUpper();
78 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
79 if (it == keyValueMap.end()) {
80 return TString("");
81 }
82 return it->second;
83}
84
85////////////////////////////////////////////////////////////////////////////////
86template <typename T>
87T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
88
89////////////////////////////////////////////////////////////////////////////////
90template <>
91int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
92{
93 TString value(fetchValueTmp(keyValueMap, key));
94 if (value == "") {
95 return defaultValue;
96 }
97 return value.Atoi();
98}
99
100////////////////////////////////////////////////////////////////////////////////
101template <>
102double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
103{
104 TString value(fetchValueTmp(keyValueMap, key));
105 if (value == "") {
106 return defaultValue;
107 }
108 return value.Atof();
109}
110
111////////////////////////////////////////////////////////////////////////////////
112template <>
113TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
114{
115 TString value(fetchValueTmp(keyValueMap, key));
116 if (value == "") {
117 return defaultValue;
118 }
119 return value;
120}
121
122////////////////////////////////////////////////////////////////////////////////
123template <>
124bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
125{
126 TString value(fetchValueTmp(keyValueMap, key));
127 if (value == "") {
128 return defaultValue;
129 }
130
131 value.ToUpper();
132 if (value == "TRUE" || value == "T" || value == "1") {
133 return true;
134 }
135
136 return false;
137}
138
139////////////////////////////////////////////////////////////////////////////////
140template <>
141std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
142 std::vector<double> defaultValue)
143{
144 TString parseString(fetchValueTmp(keyValueMap, key));
145 if (parseString == "") {
146 return defaultValue;
147 }
148
149 parseString.ToUpper();
150 std::vector<double> values;
151
152 const TString tokenDelim("+");
153 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
154 TIter nextToken(tokenStrings);
155 TObjString *tokenString = (TObjString *)nextToken();
156 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
157 std::stringstream sstr;
158 double currentValue;
159 sstr << tokenString->GetString().Data();
160 sstr >> currentValue;
161 values.push_back(currentValue);
162 }
163 return values;
164}
165
166////////////////////////////////////////////////////////////////////////////////
168{
169 // Set default values for all option strings
170
171 DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
172
173 DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
174
175 DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
176
177 DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
178 " or cross entropy (binary classification).");
179 AddPreDefVal(TString("CROSSENTROPY"));
180 AddPreDefVal(TString("SUMOFSQUARES"));
181 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
182
183 DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
184 AddPreDefVal(TString("XAVIER"));
185 AddPreDefVal(TString("XAVIERUNIFORM"));
186 AddPreDefVal(TString("GAUSS"));
187 AddPreDefVal(TString("UNIFORM"));
188 AddPreDefVal(TString("IDENTITY"));
189 AddPreDefVal(TString("ZERO"));
190
191 DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
192
193 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
194 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
195 "Specify as 100 to use exactly 100 events. (Default: 20%)");
196
197 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
198 AddPreDefVal(TString("STANDARD")); // deprecated and not supported anymore
199 AddPreDefVal(TString("CPU"));
200 AddPreDefVal(TString("GPU"));
201 AddPreDefVal(TString("OPENCL")); // not yet implemented
202 AddPreDefVal(TString("CUDNN")); // not needed (by default GPU is now CUDNN if available)
203
204 // define training strategy separated by a separator "|"
205 DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-3,"
206 "Momentum=0.0,"
207 "ConvergenceSteps=100,"
208 "MaxEpochs=2000,"
209 "Optimizer=ADAM,"
210 "BatchSize=30,"
211 "TestRepetitions=1,"
212 "WeightDecay=0.0,"
213 "Regularization=None,"
214 "DropConfig=0.0",
215 "TrainingStrategy", "Defines the training strategies.");
216}
217
218////////////////////////////////////////////////////////////////////////////////
220{
221
223 Log() << kINFO << "Will ignore negative events in training!" << Endl;
224 }
225
226 if (fArchitectureString == "STANDARD") {
227 Log() << kWARNING << "The STANDARD architecture is not supported anymore. "
228 "Please use Architecture=CPU or Architecture=CPU."
229 "See the TMVA Users' Guide for instructions if you "
230 "encounter problems."
231 << Endl;
232 Log() << kINFO << "We will use instead the CPU architecture" << Endl;
233 fArchitectureString = "CPU";
234 }
235 if (fArchitectureString == "OPENCL") {
236 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
237 "Please use Architecture=CPU or Architecture=CPU for the "
238 "time being. See the TMVA Users' Guide for instructions "
239 "if you encounter problems."
240 << Endl;
241 // use instead GPU
242 Log() << kINFO << "We will try using the GPU-CUDA architecture if available" << Endl;
243 fArchitectureString = "GPU";
244 }
245
246 // the architecture can now be set at runtime as an option
247
248
249 if (fArchitectureString == "GPU" || fArchitectureString == "CUDNN") {
250#ifdef R__HAS_TMVAGPU
251 Log() << kINFO << "Will now use the GPU architecture !" << Endl;
252#else // case TMVA does not support GPU
253 Log() << kERROR << "CUDA backend not enabled. Please make sure "
254 "you have CUDA installed and it was successfully "
255 "detected by CMAKE by using -Dtmva-gpu=On "
256 << Endl;
257 fArchitectureString = "CPU";
258 Log() << kINFO << "Will now use instead the CPU architecture !" << Endl;
259#endif
260 }
261
262 if (fArchitectureString == "CPU") {
263#ifdef R__HAS_TMVACPU // TMVA has CPU BLAS and IMT support
264 Log() << kINFO << "Will now use the CPU architecture with BLAS and IMT support !" << Endl;
265#else // TMVA has no CPU BLAS or IMT support
266 Log() << kINFO << "Multi-core CPU backend not enabled. For better performances, make sure "
267 "you have a BLAS implementation and it was successfully "
268 "detected by CMake as well that the imt CMake flag is set."
269 << Endl;
270 Log() << kINFO << "Will use anyway the CPU architecture but with slower performance" << Endl;
271#endif
272 }
273
274 // Input Layout
277
278 // Loss function and output.
279 fOutputFunction = EOutputFunction::kSigmoid;
281 if (fErrorStrategy == "SUMOFSQUARES") {
282 fLossFunction = ELossFunction::kMeanSquaredError;
283 }
284 if (fErrorStrategy == "CROSSENTROPY") {
285 fLossFunction = ELossFunction::kCrossEntropy;
286 }
287 fOutputFunction = EOutputFunction::kSigmoid;
288 } else if (fAnalysisType == Types::kRegression) {
289 if (fErrorStrategy != "SUMOFSQUARES") {
290 Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
291 << " neural net error function. Setting error function to "
292 << " SUMOFSQUARES now." << Endl;
293 }
294
295 fLossFunction = ELossFunction::kMeanSquaredError;
296 fOutputFunction = EOutputFunction::kIdentity;
297 } else if (fAnalysisType == Types::kMulticlass) {
298 if (fErrorStrategy == "SUMOFSQUARES") {
299 fLossFunction = ELossFunction::kMeanSquaredError;
300 }
301 if (fErrorStrategy == "CROSSENTROPY") {
302 fLossFunction = ELossFunction::kCrossEntropy;
303 }
304 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
305 fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
306 }
307 fOutputFunction = EOutputFunction::kSoftmax;
308 }
309
310 // Initialization
311 // the biases will be always initialized to zero
312 if (fWeightInitializationString == "XAVIER") {
314 } else if (fWeightInitializationString == "XAVIERUNIFORM") {
316 } else if (fWeightInitializationString == "GAUSS") {
318 } else if (fWeightInitializationString == "UNIFORM") {
320 } else if (fWeightInitializationString == "ZERO") {
322 } else if (fWeightInitializationString == "IDENTITY") {
324 } else {
326 }
327
328 // Training settings.
329
331 for (auto &block : strategyKeyValues) {
332 TTrainingSettings settings;
333
334 settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
335 settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
336 settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
337 settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
338 settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
339 settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
340 settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
341 settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
342
343 TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
344 if (regularization == "L1") {
346 } else if (regularization == "L2") {
348 } else {
350 }
351
352 TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
353 settings.optimizerName = optimizer;
354 if (optimizer == "SGD") {
356 } else if (optimizer == "ADAM") {
358 } else if (optimizer == "ADAGRAD") {
360 } else if (optimizer == "RMSPROP") {
362 } else if (optimizer == "ADADELTA") {
364 } else {
365 // Make Adam as default choice if the input string is
366 // incorrect.
368 settings.optimizerName = "ADAM";
369 }
370 // check for specific optimizer parameters
371 std::vector<TString> optimParamLabels = {"_beta1", "_beta2", "_eps", "_rho"};
372 //default values
373 std::map<TString, double> defaultValues = {
374 {"ADADELTA_eps", 1.E-8}, {"ADADELTA_rho", 0.95},
375 {"ADAGRAD_eps", 1.E-8},
376 {"ADAM_beta1", 0.9}, {"ADAM_beta2", 0.999}, {"ADAM_eps", 1.E-7},
377 {"RMSPROP_eps", 1.E-7}, {"RMSPROP_rho", 0.9},
378 };
379 for (auto &pN : optimParamLabels) {
380 TString optimParamName = settings.optimizerName + pN;
381 // check if optimizer has default values for this specific parameters
382 if (defaultValues.count(optimParamName) > 0) {
383 double defValue = defaultValues[optimParamName];
384 double val = fetchValueTmp(block, optimParamName, defValue);
385 // create entry in settings for this optimizer parameter
386 settings.optimizerParams[optimParamName] = val;
387 }
388 }
389
390 fTrainingSettings.push_back(settings);
391 }
392
393 // this set fInputShape[0] = batchSize
394 this->SetBatchSize(fTrainingSettings.front().batchSize);
395
396 // case inputlayout and batch layout was not given. Use default then
397 // (1, batchsize, nvariables)
398 // fInputShape[0] -> BatchSize
399 // fInputShape[1] -> InputDepth
400 // fInputShape[2] -> InputHeight
401 // fInputShape[3] -> InputWidth
402 if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
403 fInputShape[1] = 1;
404 fInputShape[2] = 1;
406 }
407 // case when batch layout is not provided (all zero)
408 // batch layout can be determined by the input layout + batch size
409 // case DNN : { 1, B, W }
410 // case CNN : { B, C, H*W}
411 // case RNN : { B, T, H*W }
412
413 if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
414 // case first layer is DENSE
415 if (fInputShape[2] == 1 && fInputShape[1] == 1) {
416 // case of (1, batchsize, input features)
417 fBatchDepth = 1;
418 fBatchHeight = fTrainingSettings.front().batchSize;
420 }
421 else { // more general cases (e.g. for CNN)
422 // case CONV or RNN
423 fBatchDepth = fTrainingSettings.front().batchSize;
426 }
427 }
428}
429
430////////////////////////////////////////////////////////////////////////////////
431/// default initializations
433{
434 // Nothing to do here
435}
436
437////////////////////////////////////////////////////////////////////////////////
438/// Parse the input layout
440{
441 // Define the delimiter
442 const TString delim("|");
443
444 // Get the input layout string
445 TString inputLayoutString = this->GetInputLayoutString();
446
447 // Split the input layout string
448 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
449 TIter nextInputDim(inputDimStrings);
450 TObjString *inputDimString = (TObjString *)nextInputDim();
451
452 // Go through every token and save its absolute value in the shape array
453 // The first token is the batch size for easy compatibility with cudnn
454 int subDim = 1;
455 std::vector<size_t> inputShape;
456 inputShape.reserve(inputLayoutString.Length()/2 + 2);
457 inputShape.push_back(0); // Will be set later by Trainingsettings, use 0 value now
458 for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
459 // size_t is unsigned
460 subDim = (size_t) abs(inputDimString->GetString().Atoi());
461 // Size among unused dimensions should be set to 1 for cudnn
462 //if (subDim == 0) subDim = 1;
463 inputShape.push_back(subDim);
464 }
465 // it is expected that empty Shape has at least 4 dimensions. We pad the missing one's with 1
466 // for example in case of dense layer input layouts
467 // when we will support 3D convolutions we would need to add extra 1's
468 if (inputShape.size() == 2) {
469 // case of dense layer where only width is specified
470 inputShape = {inputShape[0], 1, 1, inputShape[1]};
471 }
472 else if (inputShape.size() == 3) {
473 //e.g. case of RNN T,W -> T,1,W
474 inputShape = {inputShape[0], inputShape[1], 1, inputShape[2]};
475 }
476
477 this->SetInputShape(inputShape);
478}
479
480////////////////////////////////////////////////////////////////////////////////
481/// Parse the input layout
483{
484 // Define the delimiter
485 const TString delim("|");
486
487 // Get the input layout string
488 TString batchLayoutString = this->GetBatchLayoutString();
489
490 size_t batchDepth = 0;
491 size_t batchHeight = 0;
492 size_t batchWidth = 0;
493
494 // Split the input layout string
495 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
496 TIter nextBatchDim(batchDimStrings);
497 TObjString *batchDimString = (TObjString *)nextBatchDim();
498 int idxToken = 0;
499
500 for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
501 switch (idxToken) {
502 case 0: // input depth
503 {
504 TString strDepth(batchDimString->GetString());
505 batchDepth = (size_t)strDepth.Atoi();
506 } break;
507 case 1: // input height
508 {
509 TString strHeight(batchDimString->GetString());
510 batchHeight = (size_t)strHeight.Atoi();
511 } break;
512 case 2: // input width
513 {
514 TString strWidth(batchDimString->GetString());
515 batchWidth = (size_t)strWidth.Atoi();
516 } break;
517 }
518 ++idxToken;
519 }
520
521 this->SetBatchDepth(batchDepth);
522 this->SetBatchHeight(batchHeight);
523 this->SetBatchWidth(batchWidth);
524}
525
526////////////////////////////////////////////////////////////////////////////////
527/// Create a deep net based on the layout string
528template <typename Architecture_t, typename Layer_t>
531{
532 // Layer specification, layer details
533 const TString layerDelimiter(",");
534 const TString subDelimiter("|");
535
536 TString layoutString = this->GetLayoutString();
537
538 //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
539
540 // Split layers
541 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
542 TIter nextLayer(layerStrings);
543 TObjString *layerString = (TObjString *)nextLayer();
544
545
546 for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
547
548 // Split layer details
549 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
550 TIter nextToken(subStrings);
551 TObjString *token = (TObjString *)nextToken();
552
553 // Determine the type of the layer
554 TString strLayerType = token->GetString();
555
556
557 if (strLayerType == "DENSE") {
558 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
559 } else if (strLayerType == "CONV") {
560 ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
561 } else if (strLayerType == "MAXPOOL") {
562 ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
563 } else if (strLayerType == "RESHAPE") {
564 ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
565 } else if (strLayerType == "BNORM") {
566 ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
567 } else if (strLayerType == "RNN") {
568 ParseRecurrentLayer(kLayerRNN, deepNet, nets, layerString->GetString(), subDelimiter);
569 } else if (strLayerType == "LSTM") {
570 ParseRecurrentLayer(kLayerLSTM, deepNet, nets, layerString->GetString(), subDelimiter);
571 } else if (strLayerType == "GRU") {
572 ParseRecurrentLayer(kLayerGRU, deepNet, nets, layerString->GetString(), subDelimiter);
573 } else {
574 // no type of layer specified - assume is dense layer as in old DNN interface
575 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
576 }
577 }
578}
579
580////////////////////////////////////////////////////////////////////////////////
581/// Pases the layer string and creates the appropriate dense layer
582template <typename Architecture_t, typename Layer_t>
584 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
585 TString delim)
586{
587 int width = 0;
588 EActivationFunction activationFunction = EActivationFunction::kTanh;
589
590 // this return number of input variables for the method
591 // it can be used to deduce width of dense layer if specified as N+10
592 // where N is the number of input variables
593 const size_t inputSize = GetNvar();
594
595 // Split layer details
596 TObjArray *subStrings = layerString.Tokenize(delim);
597 TIter nextToken(subStrings);
598 TObjString *token = (TObjString *)nextToken();
599
600 // loop on the tokens
601 // order of sepcifying width and activation function is not relevant
602 // both 100|TANH and TANH|100 are valid cases
603 for (; token != nullptr; token = (TObjString *)nextToken()) {
604 // try a match with the activation function
605 TString strActFnc(token->GetString());
606 // if first token defines the layer type- skip it
607 if (strActFnc =="DENSE") continue;
608
609 if (strActFnc == "RELU") {
610 activationFunction = DNN::EActivationFunction::kRelu;
611 } else if (strActFnc == "TANH") {
612 activationFunction = DNN::EActivationFunction::kTanh;
613 } else if (strActFnc == "FTANH") {
614 activationFunction = DNN::EActivationFunction::kFastTanh;
615 } else if (strActFnc == "SYMMRELU") {
616 activationFunction = DNN::EActivationFunction::kSymmRelu;
617 } else if (strActFnc == "SOFTSIGN") {
618 activationFunction = DNN::EActivationFunction::kSoftSign;
619 } else if (strActFnc == "SIGMOID") {
620 activationFunction = DNN::EActivationFunction::kSigmoid;
621 } else if (strActFnc == "LINEAR") {
622 activationFunction = DNN::EActivationFunction::kIdentity;
623 } else if (strActFnc == "GAUSS") {
624 activationFunction = DNN::EActivationFunction::kGauss;
625 } else if (width == 0) {
626 // no match found try to parse as text showing the width
627 // support for input a formula where the variable 'x' is 'N' in the string
628 // use TFormula for the evaluation
629 TString strNumNodes = strActFnc;
630 // number of nodes
631 TString strN("x");
632 strNumNodes.ReplaceAll("N", strN);
633 strNumNodes.ReplaceAll("n", strN);
634 TFormula fml("tmp", strNumNodes);
635 width = fml.Eval(inputSize);
636 }
637 }
638 // avoid zero width. assume is last layer and give width = output width
639 // Determine the number of outputs
640 size_t outputSize = 1;
642 outputSize = GetNTargets();
643 } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
644 outputSize = DataInfo().GetNClasses();
645 }
646 if (width == 0) width = outputSize;
647
648 // Add the dense layer, initialize the weights and biases and copy
649 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
650 denseLayer->Initialize();
651
652 // add same layer to fNet
653 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
654
655 //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
656
657 // add the copy to all slave nets
658 //for (size_t i = 0; i < nets.size(); i++) {
659 // nets[i].AddDenseLayer(copyDenseLayer);
660 //}
661
662 // check compatibility of added layer
663 // for a dense layer input should be 1 x 1 x DxHxW
664}
665
666////////////////////////////////////////////////////////////////////////////////
667/// Pases the layer string and creates the appropriate convolutional layer
668template <typename Architecture_t, typename Layer_t>
670 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
671 TString delim)
672{
673 int depth = 0;
674 int fltHeight = 0;
675 int fltWidth = 0;
676 int strideRows = 0;
677 int strideCols = 0;
678 int zeroPadHeight = 0;
679 int zeroPadWidth = 0;
680 EActivationFunction activationFunction = EActivationFunction::kTanh;
681
682 // Split layer details
683 TObjArray *subStrings = layerString.Tokenize(delim);
684 TIter nextToken(subStrings);
685 TObjString *token = (TObjString *)nextToken();
686 int idxToken = 0;
687
688 for (; token != nullptr; token = (TObjString *)nextToken()) {
689 switch (idxToken) {
690 case 1: // depth
691 {
692 TString strDepth(token->GetString());
693 depth = strDepth.Atoi();
694 } break;
695 case 2: // filter height
696 {
697 TString strFltHeight(token->GetString());
698 fltHeight = strFltHeight.Atoi();
699 } break;
700 case 3: // filter width
701 {
702 TString strFltWidth(token->GetString());
703 fltWidth = strFltWidth.Atoi();
704 } break;
705 case 4: // stride in rows
706 {
707 TString strStrideRows(token->GetString());
708 strideRows = strStrideRows.Atoi();
709 } break;
710 case 5: // stride in cols
711 {
712 TString strStrideCols(token->GetString());
713 strideCols = strStrideCols.Atoi();
714 } break;
715 case 6: // zero padding height
716 {
717 TString strZeroPadHeight(token->GetString());
718 zeroPadHeight = strZeroPadHeight.Atoi();
719 } break;
720 case 7: // zero padding width
721 {
722 TString strZeroPadWidth(token->GetString());
723 zeroPadWidth = strZeroPadWidth.Atoi();
724 } break;
725 case 8: // activation function
726 {
727 TString strActFnc(token->GetString());
728 if (strActFnc == "RELU") {
729 activationFunction = DNN::EActivationFunction::kRelu;
730 } else if (strActFnc == "TANH") {
731 activationFunction = DNN::EActivationFunction::kTanh;
732 } else if (strActFnc == "SYMMRELU") {
733 activationFunction = DNN::EActivationFunction::kSymmRelu;
734 } else if (strActFnc == "SOFTSIGN") {
735 activationFunction = DNN::EActivationFunction::kSoftSign;
736 } else if (strActFnc == "SIGMOID") {
737 activationFunction = DNN::EActivationFunction::kSigmoid;
738 } else if (strActFnc == "LINEAR") {
739 activationFunction = DNN::EActivationFunction::kIdentity;
740 } else if (strActFnc == "GAUSS") {
741 activationFunction = DNN::EActivationFunction::kGauss;
742 }
743 } break;
744 }
745 ++idxToken;
746 }
747
748 // Add the convolutional layer, initialize the weights and biases and copy
749 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
750 zeroPadHeight, zeroPadWidth, activationFunction);
751 convLayer->Initialize();
752
753 // Add same layer to fNet
754 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
755 zeroPadHeight, zeroPadWidth, activationFunction);
756
757 //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
758
759 //// add the copy to all slave nets
760 //for (size_t i = 0; i < nets.size(); i++) {
761 // nets[i].AddConvLayer(copyConvLayer);
762 //}
763}
764
765////////////////////////////////////////////////////////////////////////////////
766/// Pases the layer string and creates the appropriate max pool layer
767template <typename Architecture_t, typename Layer_t>
769 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
770 TString delim)
771{
772
773 int filterHeight = 0;
774 int filterWidth = 0;
775 int strideRows = 0;
776 int strideCols = 0;
777
778 // Split layer details
779 TObjArray *subStrings = layerString.Tokenize(delim);
780 TIter nextToken(subStrings);
781 TObjString *token = (TObjString *)nextToken();
782 int idxToken = 0;
783
784 for (; token != nullptr; token = (TObjString *)nextToken()) {
785 switch (idxToken) {
786 case 1: // filter height
787 {
788 TString strFrmHeight(token->GetString());
789 filterHeight = strFrmHeight.Atoi();
790 } break;
791 case 2: // filter width
792 {
793 TString strFrmWidth(token->GetString());
794 filterWidth = strFrmWidth.Atoi();
795 } break;
796 case 3: // stride in rows
797 {
798 TString strStrideRows(token->GetString());
799 strideRows = strStrideRows.Atoi();
800 } break;
801 case 4: // stride in cols
802 {
803 TString strStrideCols(token->GetString());
804 strideCols = strStrideCols.Atoi();
805 } break;
806 }
807 ++idxToken;
808 }
809
810 // Add the Max pooling layer
811 // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
812 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
813
814 // Add the same layer to fNet
815 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
816
817
818 //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
819
820 //// add the copy to all slave nets
821 //for (size_t i = 0; i < nets.size(); i++) {
822 // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
823 //}
824}
825
826////////////////////////////////////////////////////////////////////////////////
827/// Pases the layer string and creates the appropriate reshape layer
828template <typename Architecture_t, typename Layer_t>
830 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
831 TString delim)
832{
833 int depth = 0;
834 int height = 0;
835 int width = 0;
836 bool flattening = false;
837
838 // Split layer details
839 TObjArray *subStrings = layerString.Tokenize(delim);
840 TIter nextToken(subStrings);
841 TObjString *token = (TObjString *)nextToken();
842 int idxToken = 0;
843
844 for (; token != nullptr; token = (TObjString *)nextToken()) {
845 if (token->GetString() == "FLAT") idxToken=4;
846 switch (idxToken) {
847 case 1: {
848 TString strDepth(token->GetString());
849 depth = strDepth.Atoi();
850 } break;
851 case 2: // height
852 {
853 TString strHeight(token->GetString());
854 height = strHeight.Atoi();
855 } break;
856 case 3: // width
857 {
858 TString strWidth(token->GetString());
859 width = strWidth.Atoi();
860 } break;
861 case 4: // flattening
862 {
863 TString flat(token->GetString());
864 if (flat == "FLAT") {
865 flattening = true;
866 }
867 } break;
868 }
869 ++idxToken;
870 }
871
872 // Add the reshape layer
873 // TReshapeLayer<Architecture_t> *reshapeLayer =
874 deepNet.AddReshapeLayer(depth, height, width, flattening);
875
876 // Add the same layer to fNet
877 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
878
879 //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
880
881 //// add the copy to all slave nets
882 //for (size_t i = 0; i < nets.size(); i++) {
883 // nets[i].AddReshapeLayer(copyReshapeLayer);
884 //}
885}
886
887////////////////////////////////////////////////////////////////////////////////
888/// Pases the layer string and creates the appropriate reshape layer
889template <typename Architecture_t, typename Layer_t>
891 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
892 TString delim)
893{
894
895 // default values
896 double momentum = -1; //0.99;
897 double epsilon = 0.0001;
898
899 // Split layer details
900 TObjArray *subStrings = layerString.Tokenize(delim);
901 TIter nextToken(subStrings);
902 TObjString *token = (TObjString *)nextToken();
903 int idxToken = 0;
904
905 for (; token != nullptr; token = (TObjString *)nextToken()) {
906 switch (idxToken) {
907 case 1: {
908 momentum = std::atof(token->GetString().Data());
909 } break;
910 case 2: // height
911 {
912 epsilon = std::atof(token->GetString().Data());
913 } break;
914 }
915 ++idxToken;
916 }
917
918 // Add the batch norm layer
919 //
920 auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
921 layer->Initialize();
922
923 // Add the same layer to fNet
924 if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
925
926}
927
928////////////////////////////////////////////////////////////////////////////////
929/// Pases the layer string and creates the appropriate rnn layer
930template <typename Architecture_t, typename Layer_t>
932 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
933 TString delim)
934{
935 // int depth = 0;
936 int stateSize = 0;
937 int inputSize = 0;
938 int timeSteps = 0;
939 bool rememberState = false;
940 bool returnSequence = false;
941 bool resetGateAfter = false;
942
943 // Split layer details
944 TObjArray *subStrings = layerString.Tokenize(delim);
945 TIter nextToken(subStrings);
946 TObjString *token = (TObjString *)nextToken();
947 int idxToken = 0;
948
949 for (; token != nullptr; token = (TObjString *)nextToken()) {
950 switch (idxToken) {
951 case 1: // state size
952 {
953 TString strstateSize(token->GetString());
954 stateSize = strstateSize.Atoi();
955 break;
956 }
957 case 2: // input size
958 {
959 TString strinputSize(token->GetString());
960 inputSize = strinputSize.Atoi();
961 break;
962 }
963 case 3: // time steps
964 {
965 TString strtimeSteps(token->GetString());
966 timeSteps = strtimeSteps.Atoi();
967 break;
968 }
969 case 4: // returnSequence (option stateful in Keras)
970 {
971 TString strrememberState(token->GetString());
972 rememberState = (bool) strrememberState.Atoi();
973 break;
974 }
975 case 5: // return full output sequence (1 or 0)
976 {
977 TString str(token->GetString());
978 returnSequence = (bool)str.Atoi();
979 break;
980 }
981 case 6: // resetGate after option (only for GRU)
982 {
983 TString str(token->GetString());
984 resetGateAfter = (bool)str.Atoi();
985 }
986 }
987 ++idxToken;
988 }
989
990 // Add the recurrent layer, initialize the weights and biases and copy
991 if (rnnType == kLayerRNN) {
992 auto * recurrentLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
993 recurrentLayer->Initialize();
994 // Add same layer to fNet
995 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
996 }
997 else if (rnnType == kLayerLSTM ) {
998 auto *recurrentLayer = deepNet.AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
999 recurrentLayer->Initialize();
1000 // Add same layer to fNet
1001 if (fBuildNet)
1002 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1003 }
1004 else if (rnnType == kLayerGRU) {
1005 if (Architecture_t::IsCudnn()) resetGateAfter = true; // needed for Cudnn
1006 auto *recurrentLayer = deepNet.AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1007 recurrentLayer->Initialize();
1008 // Add same layer to fNet
1009 if (fBuildNet)
1010 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1011 }
1012 else {
1013 Log() << kFATAL << "Invalid Recurrent layer type " << Endl;
1014 }
1015}
1016
1017////////////////////////////////////////////////////////////////////////////////
1018/// Standard constructor.
1019MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
1020 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1021 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1022 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1023 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1024 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1025 fXInput()
1026{
1027 // Nothing to do here
1028}
1029
1030////////////////////////////////////////////////////////////////////////////////
1031/// Constructor from a weight file.
1032MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
1033 : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1034 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1035 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1036 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1037 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1038 fXInput()
1039{
1040 // Nothing to do here
1041}
1042
1043////////////////////////////////////////////////////////////////////////////////
1044/// Destructor.
1046{
1047 // Nothing to do here
1048}
1049
1050////////////////////////////////////////////////////////////////////////////////
1051/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1052auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1053{
1054 // remove empty spaces
1055 parseString.ReplaceAll(" ","");
1056 KeyValueVector_t blockKeyValues;
1057 const TString keyValueDelim("=");
1058
1059 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1060 TIter nextBlock(blockStrings);
1061 TObjString *blockString = (TObjString *)nextBlock();
1062
1063 for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1064 blockKeyValues.push_back(std::map<TString, TString>());
1065 std::map<TString, TString> &currentBlock = blockKeyValues.back();
1066
1067 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1068 TIter nextToken(subStrings);
1069 TObjString *token = (TObjString *)nextToken();
1070
1071 for (; token != nullptr; token = (TObjString *)nextToken()) {
1072 TString strKeyValue(token->GetString());
1073 int delimPos = strKeyValue.First(keyValueDelim.Data());
1074 if (delimPos <= 0) continue;
1075
1076 TString strKey = TString(strKeyValue(0, delimPos));
1077 strKey.ToUpper();
1078 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1079
1080 strKey.Strip(TString::kBoth, ' ');
1081 strValue.Strip(TString::kBoth, ' ');
1082
1083 currentBlock.insert(std::make_pair(strKey, strValue));
1084 }
1085 }
1086 return blockKeyValues;
1087}
1088
1089////////////////////////////////////////////////////////////////////////////////
1090/// What kind of analysis type can handle the CNN
1092{
1093 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1094 if (type == Types::kMulticlass) return kTRUE;
1095 if (type == Types::kRegression) return kTRUE;
1096
1097 return kFALSE;
1098}
1099
1100////////////////////////////////////////////////////////////////////////////////
1101/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1102/// 100 etc.
1103/// - 20% and 0.2 selects 20% of the training set as validation data.
1104/// - 100 selects 100 events as the validation data.
1105///
1106/// @return number of samples in validation set
1107///
1109{
1110 Int_t nValidationSamples = 0;
1111 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1112
1113 // Parsing + Validation
1114 // --------------------
1115 if (fNumValidationString.EndsWith("%")) {
1116 // Relative spec. format 20%
1117 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1118
1119 if (intValStr.IsFloat()) {
1120 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1121 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1122 } else {
1123 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1124 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1125 }
1126 } else if (fNumValidationString.IsFloat()) {
1127 Double_t valSizeAsDouble = fNumValidationString.Atof();
1128
1129 if (valSizeAsDouble < 1.0) {
1130 // Relative spec. format 0.2
1131 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1132 } else {
1133 // Absolute spec format 100 or 100.0
1134 nValidationSamples = valSizeAsDouble;
1135 }
1136 } else {
1137 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1138 << Endl;
1139 }
1140
1141 // Value validation
1142 // ----------------
1143 if (nValidationSamples < 0) {
1144 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1145 }
1146
1147 if (nValidationSamples == 0) {
1148 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1149 }
1150
1151 if (nValidationSamples >= (Int_t)trainingSetSize) {
1152 Log() << kFATAL << "Validation size \"" << fNumValidationString
1153 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1154 }
1155
1156 return nValidationSamples;
1157}
1158
1159
1160////////////////////////////////////////////////////////////////////////////////
1161/// Implementation of architecture specific train method
1162///
1163template <typename Architecture_t>
1165{
1166
1167 using Scalar_t = typename Architecture_t::Scalar_t;
1170 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1171
1172 bool debug = Log().GetMinType() == kDEBUG;
1173
1174
1175 // set the random seed for weight initialization
1176 Architecture_t::SetRandomSeed(fRandomSeed);
1177
1178 ///split training data in training and validation data
1179 // and determine the number of training and testing examples
1180
1181 size_t nValidationSamples = GetNumValidationSamples();
1182 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1183
1184 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1185 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1186 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1187
1188 size_t trainingPhase = 1;
1189
1190 for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1191
1192 size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1193
1194
1195 // After the processing of the options, initialize the master deep net
1196 size_t batchSize = settings.batchSize;
1197 this->SetBatchSize(batchSize);
1198 // Should be replaced by actual implementation. No support for this now.
1199 size_t inputDepth = this->GetInputDepth();
1200 size_t inputHeight = this->GetInputHeight();
1201 size_t inputWidth = this->GetInputWidth();
1202 size_t batchDepth = this->GetBatchDepth();
1203 size_t batchHeight = this->GetBatchHeight();
1204 size_t batchWidth = this->GetBatchWidth();
1205 ELossFunction J = this->GetLossFunction();
1207 ERegularization R = settings.regularization;
1208 EOptimizer O = settings.optimizer;
1209 Scalar_t weightDecay = settings.weightDecay;
1210
1211 //Batch size should be included in batch layout as well. There are two possibilities:
1212 // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1213 // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1214 // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1215 // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1216
1217 if (batchDepth != batchSize && batchDepth > 1) {
1218 Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1219 return;
1220 }
1221 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1222 Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1223 return;
1224 }
1225
1226
1227 //check also that input layout compatible with batch layout
1228 bool badLayout = false;
1229 // case batch depth == batch size
1230 if (batchDepth == batchSize)
1231 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1232 // case batch Height is batch size
1233 if (batchHeight == batchSize && batchDepth == 1)
1234 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1235 if (badLayout) {
1236 Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1237 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1238 return;
1239 }
1240
1241 // check batch size is compatible with number of events
1242 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1243 Log() << kFATAL << "Number of samples in the datasets are train: ("
1244 << nTrainingSamples << ") test: (" << nValidationSamples
1245 << "). One of these is smaller than the batch size of "
1246 << settings.batchSize << ". Please increase the batch"
1247 << " size to be at least the same size as the smallest"
1248 << " of them." << Endl;
1249 }
1250
1251 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1252
1253 // create a copy of DeepNet for evaluating but with batch size = 1
1254 // fNet is the saved network and will be with CPU or Referrence architecture
1255 if (trainingPhase == 1) {
1256 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1257 batchHeight, batchWidth, J, I, R, weightDecay));
1258 fBuildNet = true;
1259 }
1260 else
1261 fBuildNet = false;
1262
1263 // Initialize the vector of slave nets
1264 std::vector<DeepNet_t> nets{};
1265 nets.reserve(nThreads);
1266 for (size_t i = 0; i < nThreads; i++) {
1267 // create a copies of the master deep net
1268 nets.push_back(deepNet);
1269 }
1270
1271
1272 // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1273 CreateDeepNet(deepNet, nets);
1274
1275
1276 // set droput probabilities
1277 // use convention to store in the layer 1.- dropout probabilities
1278 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1279 for (auto & p : dropoutVector) {
1280 p = 1.0 - p;
1281 }
1282 deepNet.SetDropoutProbabilities(dropoutVector);
1283
1284 if (trainingPhase > 1) {
1285 // copy initial weights from fNet to deepnet
1286 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1287 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1288 }
1289 }
1290
1291 // when fNet is built create also input matrix that will be used to evaluate it
1292 if (fBuildNet) {
1293 //int n1 = batchHeight;
1294 //int n2 = batchWidth;
1295 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1296 //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1297 //fXInput = TensorImpl_t(1,n1,n2);
1299 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1300 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1301 fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1302
1303
1304 // create pointer to output matrix used for the predictions
1305 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1306
1307 // print the created network
1308 Log() << "***** Deep Learning Network *****" << Endl;
1309 if (Log().GetMinType() <= kINFO)
1310 deepNet.Print();
1311 }
1312 Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1313
1314 // Loading the training and validation datasets
1315 TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1316 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1317 {inputDepth, inputHeight, inputWidth},
1318 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1319 deepNet.GetOutputWidth(), nThreads);
1320
1321 TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1322 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1323 {inputDepth, inputHeight, inputWidth},
1324 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1325 deepNet.GetOutputWidth(), nThreads);
1326
1327
1328
1329 // do an evaluation of the network to compute initial minimum test error
1330
1331 Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1332
1333 Double_t minValError = 0.0;
1334 Log() << "Compute initial loss on the validation data " << Endl;
1335 for (auto batch : validationData) {
1336 auto inputTensor = batch.GetInput();
1337 auto outputMatrix = batch.GetOutput();
1338 auto weights = batch.GetWeights();
1339
1340 //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1341 // should we apply droput to the loss ??
1342 minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1343 }
1344 // add Regularization term
1345 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1346 minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1347 minValError += regzTerm;
1348
1349
1350 // create a pointer to base class VOptimizer
1351 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1352
1353 // initialize the base class pointer with the corresponding derived class object.
1354 switch (O) {
1355
1356 case EOptimizer::kSGD:
1357 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1358 new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1359 break;
1360
1361 case EOptimizer::kAdam: {
1362 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1364 deepNet, settings.learningRate, settings.optimizerParams["ADAM_beta1"],
1365 settings.optimizerParams["ADAM_beta2"], settings.optimizerParams["ADAM_eps"]));
1366 break;
1367 }
1368
1369 case EOptimizer::kAdagrad:
1370 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1371 new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1372 settings.optimizerParams["ADAGRAD_eps"]));
1373 break;
1374
1375 case EOptimizer::kRMSProp:
1376 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1377 new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum,
1378 settings.optimizerParams["RMSPROP_rho"],
1379 settings.optimizerParams["RMSPROP_eps"]));
1380 break;
1381
1382 case EOptimizer::kAdadelta:
1383 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1384 new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1385 settings.optimizerParams["ADADELTA_rho"],
1386 settings.optimizerParams["ADADELTA_eps"]));
1387 break;
1388 }
1389
1390
1391 // Initialize the vector of batches, one batch for one slave network
1392 std::vector<TTensorBatch<Architecture_t>> batches{};
1393
1394 bool converged = false;
1395 size_t convergenceCount = 0;
1396 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1397
1398 // start measuring
1399 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1400 tstart = std::chrono::system_clock::now();
1401
1402 // function building string with optimizer parameters values for logging
1403 auto optimParametersString = [&]() {
1404 TString optimParameters;
1405 for ( auto & element : settings.optimizerParams) {
1406 TString key = element.first;
1407 key.ReplaceAll(settings.optimizerName + "_", ""); // strip optimizerName_
1408 double value = element.second;
1409 if (!optimParameters.IsNull())
1410 optimParameters += ",";
1411 else
1412 optimParameters += " (";
1413 optimParameters += TString::Format("%s=%g", key.Data(), value);
1414 }
1415 if (!optimParameters.IsNull())
1416 optimParameters += ")";
1417 return optimParameters;
1418 };
1419
1420 Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1421 << " Optimizer " << settings.optimizerName
1422 << optimParametersString()
1423 << " Learning rate = " << settings.learningRate << " regularization " << (char)settings.regularization
1424 << " minimum error = " << minValError << Endl;
1425 if (!fInteractive) {
1426 std::string separator(62, '-');
1427 Log() << separator << Endl;
1428 Log() << std::setw(10) << "Epoch"
1429 << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err." << std::setw(12)
1430 << "t(s)/epoch" << std::setw(12) << "t(s)/Loss" << std::setw(12) << "nEvents/s" << std::setw(12)
1431 << "Conv. Steps" << Endl;
1432 Log() << separator << Endl;
1433 }
1434
1435 // set up generator for shuffling the batches
1436 // if seed is zero we have always a different order in the batches
1437 size_t shuffleSeed = 0;
1438 if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1439 RandomGenerator<TRandom3> rng(shuffleSeed);
1440
1441 // print weights before
1442 if (fBuildNet && debug) {
1443 Log() << "Initial Deep Net Weights " << Endl;
1444 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1445 for (size_t l = 0; l < weights_tensor.size(); ++l)
1446 weights_tensor[l].Print();
1447 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1448 bias_tensor[0].Print();
1449 }
1450
1451 Log() << " Start epoch iteration ..." << Endl;
1452 bool debugFirstEpoch = false;
1453 bool computeLossInTraining = true; // compute loss in training or at test time
1454 size_t nTrainEpochs = 0;
1455 while (!converged) {
1456 nTrainEpochs++;
1457 trainingData.Shuffle(rng);
1458
1459 // execute all epochs
1460 //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1461
1462 Double_t trainingError = 0;
1463 for (size_t i = 0; i < batchesInEpoch; ++i ) {
1464 // Clean and load new batches, one batch for one slave net
1465 //batches.clear();
1466 //batches.reserve(nThreads);
1467 //for (size_t j = 0; j < nThreads; j++) {
1468 // batches.push_back(trainingData.GetTensorBatch());
1469 //}
1470 if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1471
1472 auto my_batch = trainingData.GetTensorBatch();
1473
1474 if (debugFirstEpoch)
1475 std::cout << "got batch data - doing forward \n";
1476
1477#ifdef DEBUG
1478
1479 Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1480 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1481 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1482 Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1483 Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1484#endif
1485
1486 deepNet.Forward(my_batch.GetInput(), true);
1487 // compute also loss
1488 if (computeLossInTraining) {
1489 auto outputMatrix = my_batch.GetOutput();
1490 auto weights = my_batch.GetWeights();
1491 trainingError += deepNet.Loss(outputMatrix, weights, false);
1492 }
1493
1494 if (debugFirstEpoch)
1495 std::cout << "- doing backward \n";
1496
1497#ifdef DEBUG
1498 size_t nlayers = deepNet.GetLayers().size();
1499 for (size_t l = 0; l < nlayers; ++l) {
1500 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1501 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1502 TString::Format("initial weights layer %d", l).Data());
1503
1504 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1505 TString::Format("output tensor layer %d", l).Data());
1506 }
1507#endif
1508
1509 //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1510
1511 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1512
1513 if (debugFirstEpoch)
1514 std::cout << "- doing optimizer update \n";
1515
1516 // increment optimizer step that is used in some algorithms (e.g. ADAM)
1517 optimizer->IncrementGlobalStep();
1518 optimizer->Step();
1519
1520#ifdef DEBUG
1521 std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1522 for (size_t l = 0; l < nlayers; ++l) {
1523 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1524 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1525 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1526 }
1527 }
1528#endif
1529
1530 }
1531
1532 if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1533 //}
1534 debugFirstEpoch = false;
1535 if ((nTrainEpochs % settings.testInterval) == 0) {
1536
1537 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1538
1539 t1 = std::chrono::system_clock::now();
1540
1541 // Compute validation error.
1542
1543
1544 Double_t valError = 0.0;
1545 bool inTraining = false;
1546 for (auto batch : validationData) {
1547 auto inputTensor = batch.GetInput();
1548 auto outputMatrix = batch.GetOutput();
1549 auto weights = batch.GetWeights();
1550 // should we apply droput to the loss ??
1551 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1552 }
1553 // normalize loss to number of batches and add regularization term
1554 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1555 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1556 valError += regTerm;
1557
1558 //Log the loss value
1559 fTrainHistory.AddValue("valError",nTrainEpochs,valError);
1560
1561 t2 = std::chrono::system_clock::now();
1562
1563 // checking for convergence
1564 if (valError < minValError) {
1565 convergenceCount = 0;
1566 } else {
1567 convergenceCount += settings.testInterval;
1568 }
1569
1570 // copy configuration when reached a minimum error
1571 if (valError < minValError ) {
1572 // Copy weights from deepNet to fNet
1573 Log() << std::setw(10) << nTrainEpochs
1574 << " Minimum Test error found - save the configuration " << Endl;
1575 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1576 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1577 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1578 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1579 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1580 // }
1581 }
1582 // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1583 // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1584
1585 minValError = valError;
1586 }
1587 else if ( minValError <= 0. )
1588 minValError = valError;
1589
1590 if (!computeLossInTraining) {
1591 trainingError = 0.0;
1592 // Compute training error.
1593 for (auto batch : trainingData) {
1594 auto inputTensor = batch.GetInput();
1595 auto outputMatrix = batch.GetOutput();
1596 auto weights = batch.GetWeights();
1597 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1598 }
1599 }
1600 // normalize loss to number of batches and add regularization term
1601 trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1602 trainingError += regTerm;
1603
1604 //Log the loss value
1605 fTrainHistory.AddValue("trainingError",nTrainEpochs,trainingError);
1606
1607 // stop measuring
1608 tend = std::chrono::system_clock::now();
1609
1610 // Compute numerical throughput.
1611 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1612 std::chrono::duration<double> elapsed1 = t1-tstart;
1613 // std::chrono::duration<double> elapsed2 = t2-tstart;
1614 // time to compute training and test errors
1615 std::chrono::duration<double> elapsed_testing = tend-t1;
1616
1617 double seconds = elapsed_seconds.count();
1618 // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1619 // nGFlops *= deepnet.GetNFlops() * 1e-9;
1620 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1621
1622 converged =
1623 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1624
1625
1626 Log() << std::setw(10) << nTrainEpochs << " | "
1627 << std::setw(12) << trainingError
1628 << std::setw(12) << valError
1629 << std::setw(12) << seconds / settings.testInterval
1630 << std::setw(12) << elapsed_testing.count()
1631 << std::setw(12) << 1. / eventTime
1632 << std::setw(12) << convergenceCount
1633 << Endl;
1634
1635 if (converged) {
1636 Log() << Endl;
1637 }
1638 tstart = std::chrono::system_clock::now();
1639 }
1640
1641 // if (stepCount % 10 == 0 || converged) {
1642 if (converged && debug) {
1643 Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << nTrainEpochs
1644 << Endl;
1645 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1646 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1647 for (size_t l = 0; l < weights_tensor.size(); ++l)
1648 weights_tensor[l].Print();
1649 bias_tensor[0].Print();
1650 }
1651
1652 }
1653
1654 trainingPhase++;
1655 } // end loop on training Phase
1656}
1657
1658////////////////////////////////////////////////////////////////////////////////
1660{
1661 if (fInteractive) {
1662 Log() << kFATAL << "Not implemented yet" << Endl;
1663 return;
1664 }
1665
1666 // using for training same scalar type defined for the prediction
1667 if (this->GetArchitectureString() == "GPU") {
1668#ifdef R__HAS_TMVAGPU
1669 Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1670#ifdef R__HAS_CUDNN
1671 TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1672#else
1673 TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1674#endif
1675#else
1676 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1677 "you have CUDA installed and it was successfully "
1678 "detected by CMAKE."
1679 << Endl;
1680 return;
1681#endif
1682 } else if (this->GetArchitectureString() == "CPU") {
1683#ifdef R__HAS_TMVACPU
1684 // note that number of threads used for BLAS might be different
1685 // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1686 Log() << kINFO << "Start of deep neural network training on CPU using MT, nthreads = "
1687 << gConfig().GetNCpu() << Endl << Endl;
1688#else
1689 Log() << kINFO << "Start of deep neural network training on single thread CPU (without ROOT-MT support) " << Endl
1690 << Endl;
1691#endif
1692 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1693 return;
1694 }
1695 else {
1696 Log() << kFATAL << this->GetArchitectureString() <<
1697 " is not a supported architecture for TMVA::MethodDL"
1698 << Endl;
1699 }
1700
1701}
1702
1703////////////////////////////////////////////////////////////////////////////////
1705{
1706 // fill the input tensor fXInput from the current Event data
1707 // with the correct shape depending on the model used
1708 // The input tensor is used for network prediction after training
1709 // using a single event. The network batch size must be equal to 1.
1710 // The architecture specified at compile time in ArchitectureImpl_t
1711 // is used. This should be the CPU architecture
1712
1713 if (!fNet || fNet->GetDepth() == 0) {
1714 Log() << kFATAL << "The network has not been trained and fNet is not built" << Endl;
1715 }
1716 if (fNet->GetBatchSize() != 1) {
1717 Log() << kFATAL << "FillINputTensor::Network batch size must be equal to 1 when doing single event predicition" << Endl;
1718 }
1719
1720 // get current event
1721 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1722 size_t nVariables = GetEvent()->GetNVariables();
1723
1724 // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1725 if (fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1726 R__ASSERT(fXInput.GetShape().size() < 4);
1727 size_t nc, nhw = 0;
1728 if (fXInput.GetShape().size() == 2) {
1729 nc = fXInput.GetShape()[0];
1730 if (nc != 1) {
1731 ArchitectureImpl_t::PrintTensor(fXInput);
1732 Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1" << Endl;
1733 }
1734 nhw = fXInput.GetShape()[1];
1735 } else {
1736 nc = fXInput.GetCSize();
1737 nhw = fXInput.GetWSize();
1738 }
1739 if (nVariables != nc * nhw) {
1740 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1741 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw << Endl;
1742 }
1743 for (size_t j = 0; j < nc; j++) {
1744 for (size_t k = 0; k < nhw; k++) {
1745 // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1746 fXInputBuffer[k * nc + j] = inputValues[j * nhw + k]; // for column layout !!!
1747 }
1748 }
1749 } else {
1750 // row-wise layout
1751 assert(fXInput.GetShape().size() >= 4);
1752 size_t nc = fXInput.GetCSize();
1753 size_t nh = fXInput.GetHSize();
1754 size_t nw = fXInput.GetWSize();
1755 size_t n = nc * nh * nw;
1756 if (nVariables != n) {
1757 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1758 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1759 << Endl;
1760 }
1761 for (size_t j = 0; j < n; j++) {
1762 // in this case TMVA event has same order as input tensor
1763 fXInputBuffer[j] = inputValues[j]; // for column layout !!!
1764 }
1765 }
1766 // copy buffer in input
1767 fXInput.GetDeviceBuffer().CopyFrom(fXInputBuffer);
1768 return;
1769}
1770
1771////////////////////////////////////////////////////////////////////////////////
1772Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1773{
1774
1776
1777 // perform the prediction
1778 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1779
1780 // return value
1781 double mvaValue = (*fYHat)(0, 0);
1782
1783 // for debugging
1784#ifdef DEBUG_MVAVALUE
1785 using Tensor_t = std::vector<MatrixImpl_t>;
1786 TMatrixF xInput(n1,n2, inputValues.data() );
1787 std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1788 xInput.Print();
1789 std::cout << "Output of DeepNet " << mvaValue << std::endl;
1790 auto & deepnet = *fNet;
1791 std::cout << "Loop on layers " << std::endl;
1792 for (int l = 0; l < deepnet.GetDepth(); ++l) {
1793 std::cout << "Layer " << l;
1794 const auto * layer = deepnet.GetLayerAt(l);
1795 const Tensor_t & layer_output = layer->GetOutput();
1796 layer->Print();
1797 std::cout << "DNN output " << layer_output.size() << std::endl;
1798 for (size_t i = 0; i < layer_output.size(); ++i) {
1799#ifdef R__HAS_TMVAGPU
1800 //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1801 TMatrixD m = layer_output[i];
1802#else
1803 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1804#endif
1805 m.Print();
1806 }
1807 const Tensor_t & layer_weights = layer->GetWeights();
1808 std::cout << "DNN weights " << layer_weights.size() << std::endl;
1809 if (layer_weights.size() > 0) {
1810 int i = 0;
1811#ifdef R__HAS_TMVAGPU
1812 TMatrixD m = layer_weights[i];
1813// TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1814#else
1815 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1816#endif
1817 m.Print();
1818 }
1819 }
1820#endif
1821
1822 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1823}
1824////////////////////////////////////////////////////////////////////////////////
1825/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1826////////////////////////////////////////////////////////////////////////////////
1827template <typename Architecture_t>
1828std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1829{
1830
1831 // Check whether the model is setup
1832 if (!fNet || fNet->GetDepth() == 0) {
1833 Log() << kFATAL << "The network has not been trained and fNet is not built"
1834 << Endl;
1835 }
1836
1837 // rebuild the networks
1838 this->SetBatchSize(batchSize);
1839 size_t inputDepth = this->GetInputDepth();
1840 size_t inputHeight = this->GetInputHeight();
1841 size_t inputWidth = this->GetInputWidth();
1842 size_t batchDepth = this->GetBatchDepth();
1843 size_t batchHeight = this->GetBatchHeight();
1844 size_t batchWidth = this->GetBatchWidth();
1845 ELossFunction J = fNet->GetLossFunction();
1846 EInitialization I = fNet->GetInitialization();
1847 ERegularization R = fNet->GetRegularization();
1848 Double_t weightDecay = fNet->GetWeightDecay();
1849
1850 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1851 using Matrix_t = typename Architecture_t::Matrix_t;
1852 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1853
1854 // create the deep neural network
1855 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1856 std::vector<DeepNet_t> nets{};
1857 fBuildNet = false;
1858 CreateDeepNet(deepNet,nets);
1859
1860 // copy weights from the saved fNet to the built DeepNet
1861 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1862 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1863 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1864 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1865 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1866 // }
1867 }
1868
1869 size_t n1 = deepNet.GetBatchHeight();
1870 size_t n2 = deepNet.GetBatchWidth();
1871 size_t n0 = deepNet.GetBatchSize();
1872 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1873 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1874 n1 = deepNet.GetBatchSize();
1875 n0 = 1;
1876 }
1877 //this->SetBatchDepth(n0);
1878 Long64_t nEvents = lastEvt - firstEvt;
1879 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1880 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1881
1882
1883 // Tensor_t xInput;
1884 // for (size_t i = 0; i < n0; ++i)
1885 // xInput.emplace_back(Matrix_t(n1,n2));
1886
1887 // create pointer to output matrix used for the predictions
1888 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1889
1890 // use timer
1891 Timer timer( nEvents, GetName(), kTRUE );
1892
1893 if (logProgress)
1894 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1895 << "Evaluation of " << GetMethodName() << " on "
1896 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1897 << " sample (" << nEvents << " events)" << Endl;
1898
1899
1900 // eventg loop
1901 std::vector<double> mvaValues(nEvents);
1902
1903
1904 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1905
1906 Long64_t ievt_end = ievt + batchSize;
1907 // case of batch prediction for
1908 if (ievt_end <= lastEvt) {
1909
1910 if (ievt == firstEvt) {
1911 Data()->SetCurrentEvent(ievt);
1912 size_t nVariables = GetEvent()->GetNVariables();
1913
1914 if (n1 == batchSize && n0 == 1) {
1915 if (n2 != nVariables) {
1916 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1917 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1918 << Endl;
1919 }
1920 } else {
1921 if (n1*n2 != nVariables || n0 != batchSize) {
1922 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1923 << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1924 << Endl;
1925 }
1926 }
1927 }
1928
1929 auto batch = testData.GetTensorBatch();
1930 auto inputTensor = batch.GetInput();
1931
1932 auto xInput = batch.GetInput();
1933 // make the prediction
1934 deepNet.Prediction(yHat, xInput, fOutputFunction);
1935 for (size_t i = 0; i < batchSize; ++i) {
1936 double value = yHat(i,0);
1937 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1938 }
1939 }
1940 else {
1941 // case of remaining events: compute prediction by single event !
1942 for (Long64_t i = ievt; i < lastEvt; ++i) {
1943 Data()->SetCurrentEvent(i);
1944 mvaValues[i] = GetMvaValue();
1945 }
1946 }
1947 }
1948
1949 if (logProgress) {
1950 Log() << kINFO
1951 << "Elapsed time for evaluation of " << nEvents << " events: "
1952 << timer.GetElapsedTime() << " " << Endl;
1953 }
1954
1955 return mvaValues;
1956}
1957
1958//////////////////////////////////////////////////////////////////////////
1959/// Get the regression output values for a single event
1960//////////////////////////////////////////////////////////////////////////
1961const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1962{
1963
1964 FillInputTensor ();
1965
1966 // perform the network prediction
1967 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1968
1969 size_t nTargets = DataInfo().GetNTargets();
1970 R__ASSERT(nTargets == fYHat->GetNcols());
1971
1972 std::vector<Float_t> output(nTargets);
1973 for (size_t i = 0; i < nTargets; i++)
1974 output[i] = (*fYHat)(0, i);
1975
1976 // ned to transform back output values
1977 if (fRegressionReturnVal == NULL)
1978 fRegressionReturnVal = new std::vector<Float_t>(nTargets);
1979 R__ASSERT(fRegressionReturnVal->size() == nTargets);
1980
1981 // N.B. one should cache here temporary event class
1982 Event *evT = new Event(*GetEvent());
1983 for (size_t i = 0; i < nTargets; ++i) {
1984 evT->SetTarget(i, output[i]);
1985 }
1986 const Event *evT2 = GetTransformationHandler().InverseTransform(evT);
1987 for (size_t i = 0; i < nTargets; ++i) {
1988 (*fRegressionReturnVal)[i] = evT2->GetTarget(i);
1989 }
1990 delete evT;
1991 return *fRegressionReturnVal;
1992}
1993//////////////////////////////////////////////////////////////////////////
1994/// Get the multi-class output values for a single event
1995//////////////////////////////////////////////////////////////////////////
1996const std::vector<Float_t> &TMVA::MethodDL::GetMulticlassValues()
1997{
1998
1999 FillInputTensor();
2000
2001 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
2002
2003 size_t nClasses = DataInfo().GetNClasses();
2004 R__ASSERT(nClasses == fYHat->GetNcols());
2005
2006 if (fMulticlassReturnVal == NULL) {
2007 fMulticlassReturnVal = new std::vector<Float_t>(nClasses);
2008 }
2009 R__ASSERT(fMulticlassReturnVal->size() == nClasses);
2010
2011 for (size_t i = 0; i < nClasses; i++) {
2012 (*fMulticlassReturnVal)[i] = (*fYHat)(0, i);
2013 }
2014 return *fMulticlassReturnVal;
2015}
2016
2017////////////////////////////////////////////////////////////////////////////////
2018/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
2019/// Here we will evaluate using a default batch size and the same architecture used for
2020/// Training
2021////////////////////////////////////////////////////////////////////////////////
2022std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
2023{
2024
2025 Long64_t nEvents = Data()->GetNEvents();
2026 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2027 if (firstEvt < 0) firstEvt = 0;
2028 nEvents = lastEvt-firstEvt;
2029
2030 // use same batch size as for training (from first strategy)
2031 size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2032 size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2033 if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
2034
2035 // using for training same scalar type defined for the prediction
2036 if (this->GetArchitectureString() == "GPU") {
2037#ifdef R__HAS_TMVAGPU
2038 Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2039#ifdef R__HAS_CUDNN
2040 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2041#else
2042 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2043#endif
2044
2045#endif
2046 }
2047 Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2048 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2049}
2050////////////////////////////////////////////////////////////////////////////////
2051void MethodDL::AddWeightsXMLTo(void * parent) const
2052{
2053 // Create the parent XML node with name "Weights"
2054 auto & xmlEngine = gTools().xmlengine();
2055 void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2056
2057 /*! Get all necessary information, in order to be able to reconstruct the net
2058 * if we read the same XML file. */
2059
2060 // Deep Net specific info
2061 Int_t depth = fNet->GetDepth();
2062
2063 Int_t inputDepth = fNet->GetInputDepth();
2064 Int_t inputHeight = fNet->GetInputHeight();
2065 Int_t inputWidth = fNet->GetInputWidth();
2066
2067 Int_t batchSize = fNet->GetBatchSize();
2068
2069 Int_t batchDepth = fNet->GetBatchDepth();
2070 Int_t batchHeight = fNet->GetBatchHeight();
2071 Int_t batchWidth = fNet->GetBatchWidth();
2072
2073 char lossFunction = static_cast<char>(fNet->GetLossFunction());
2074 char initialization = static_cast<char>(fNet->GetInitialization());
2075 char regularization = static_cast<char>(fNet->GetRegularization());
2076
2077 Double_t weightDecay = fNet->GetWeightDecay();
2078
2079 // Method specific info (not sure these are needed)
2080 char outputFunction = static_cast<char>(this->GetOutputFunction());
2081 //char lossFunction = static_cast<char>(this->GetLossFunction());
2082
2083 // Add attributes to the parent node
2084 xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2085
2086 xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2087 xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2088 xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2089
2090 xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2091 xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2092 xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2093 xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2094
2095 xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2096 xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2097 xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2098 xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2099
2100 gTools().AddAttr(nn, "WeightDecay", weightDecay);
2101
2102
2103 for (Int_t i = 0; i < depth; i++)
2104 {
2105 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2106 }
2107
2108
2109}
2110
2111////////////////////////////////////////////////////////////////////////////////
2113{
2114
2115 auto netXML = gTools().GetChild(rootXML, "Weights");
2116 if (!netXML){
2117 netXML = rootXML;
2118 }
2119
2120 size_t netDepth;
2121 gTools().ReadAttr(netXML, "NetDepth", netDepth);
2122
2123 size_t inputDepth, inputHeight, inputWidth;
2124 gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2125 gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2126 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2127
2128 size_t batchSize, batchDepth, batchHeight, batchWidth;
2129 gTools().ReadAttr(netXML, "BatchSize", batchSize);
2130 // use always batchsize = 1
2131 //batchSize = 1;
2132 gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2133 gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2134 gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2135
2136 char lossFunctionChar;
2137 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2138 char initializationChar;
2139 gTools().ReadAttr(netXML, "Initialization", initializationChar);
2140 char regularizationChar;
2141 gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2142 char outputFunctionChar;
2143 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2144 double weightDecay;
2145 gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2146
2147 // create the net
2148
2149 // DeepNetCpu_t is defined in MethodDL.h
2150 this->SetInputDepth(inputDepth);
2151 this->SetInputHeight(inputHeight);
2152 this->SetInputWidth(inputWidth);
2153 this->SetBatchDepth(batchDepth);
2154 this->SetBatchHeight(batchHeight);
2155 this->SetBatchWidth(batchWidth);
2156
2157
2158
2159 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2160 batchHeight, batchWidth,
2161 static_cast<ELossFunction>(lossFunctionChar),
2162 static_cast<EInitialization>(initializationChar),
2163 static_cast<ERegularization>(regularizationChar),
2164 weightDecay));
2165
2166 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
2167
2168
2169 //size_t previousWidth = inputWidth;
2170 auto layerXML = gTools().xmlengine().GetChild(netXML);
2171
2172 // loop on the layer and add them to the network
2173 for (size_t i = 0; i < netDepth; i++) {
2174
2175 TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2176
2177 // case of dense layer
2178 if (layerName == "DenseLayer") {
2179
2180 // read width and activation function and then we can create the layer
2181 size_t width = 0;
2182 gTools().ReadAttr(layerXML, "Width", width);
2183
2184 // Read activation function.
2185 TString funcString;
2186 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2187 EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2188
2189
2190 fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2191
2192 }
2193 // Convolutional Layer
2194 else if (layerName == "ConvLayer") {
2195
2196 // read width and activation function and then we can create the layer
2197 size_t depth = 0;
2198 gTools().ReadAttr(layerXML, "Depth", depth);
2199 size_t fltHeight, fltWidth = 0;
2200 size_t strideRows, strideCols = 0;
2201 size_t padHeight, padWidth = 0;
2202 gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2203 gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2204 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2205 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2206 gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2207 gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2208
2209 // Read activation function.
2210 TString funcString;
2211 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2212 EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2213
2214
2215 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2216 padHeight, padWidth, actFunction);
2217
2218 }
2219
2220 // MaxPool Layer
2221 else if (layerName == "MaxPoolLayer") {
2222
2223 // read maxpool layer info
2224 size_t filterHeight, filterWidth = 0;
2225 size_t strideRows, strideCols = 0;
2226 gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2227 gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2228 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2229 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2230
2231 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2232 }
2233 // Reshape Layer
2234 else if (layerName == "ReshapeLayer") {
2235
2236 // read reshape layer info
2237 size_t depth, height, width = 0;
2238 gTools().ReadAttr(layerXML, "Depth", depth);
2239 gTools().ReadAttr(layerXML, "Height", height);
2240 gTools().ReadAttr(layerXML, "Width", width);
2241 int flattening = 0;
2242 gTools().ReadAttr(layerXML, "Flattening",flattening );
2243
2244 fNet->AddReshapeLayer(depth, height, width, flattening);
2245
2246 }
2247 // RNN Layer
2248 else if (layerName == "RNNLayer") {
2249
2250 // read RNN layer info
2251 size_t stateSize,inputSize, timeSteps = 0;
2252 int rememberState= 0;
2253 int returnSequence = 0;
2254 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2255 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2256 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2257 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2258 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2259
2260 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2261
2262 }
2263 // LSTM Layer
2264 else if (layerName == "LSTMLayer") {
2265
2266 // read RNN layer info
2267 size_t stateSize,inputSize, timeSteps = 0;
2268 int rememberState, returnSequence = 0;
2269 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2270 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2271 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2272 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2273 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2274
2275 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2276
2277 }
2278 // GRU Layer
2279 else if (layerName == "GRULayer") {
2280
2281 // read RNN layer info
2282 size_t stateSize,inputSize, timeSteps = 0;
2283 int rememberState, returnSequence, resetGateAfter = 0;
2284 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2285 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2286 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2287 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2288 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2289 gTools().ReadAttr(layerXML, "ResetGateAfter", resetGateAfter);
2290
2291 if (!resetGateAfter && ArchitectureImpl_t::IsCudnn())
2292 Warning("ReadWeightsFromXML",
2293 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2294
2295 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2296 }
2297 // BatchNorm Layer
2298 else if (layerName == "BatchNormLayer") {
2299 // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2300 fNet->AddBatchNormLayer(0., 0.0);
2301 }
2302 // read weights and biases
2303 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2304
2305 // read next layer
2306 layerXML = gTools().GetNextChild(layerXML);
2307 }
2308
2309 fBuildNet = false;
2310 // create now the input and output matrices
2311 //int n1 = batchHeight;
2312 //int n2 = batchWidth;
2313 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2314 //if (fXInput.size() > 0) fXInput.clear();
2315 //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2317 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2318 // make here a ColumnMajor tensor
2319 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth(),TMVA::Experimental::MemoryLayout::ColumnMajor );
2321
2322 // create pointer to output matrix used for the predictions
2323 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2324
2325
2326}
2327
2328
2329////////////////////////////////////////////////////////////////////////////////
2330void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2331{
2332}
2333
2334////////////////////////////////////////////////////////////////////////////////
2336{
2337 // TODO
2338 return NULL;
2339}
2340
2341////////////////////////////////////////////////////////////////////////////////
2343{
2344 // TODO
2345}
2346
2347} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition RSha256.hxx:103
unsigned int UInt_t
Definition RtypesCore.h:46
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
double Double_t
Definition RtypesCore.h:59
long long Long64_t
Definition RtypesCore.h:69
constexpr Bool_t kTRUE
Definition RtypesCore.h:93
#define ClassImp(name)
Definition Rtypes.h:377
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t width
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
The Formula class.
Definition TFormula.h:89
Double_t Eval(Args... args) const
Set first 1, 2, 3 or 4 variables (e.g.
Definition TFormula.h:324
void Print(Option_t *option="") const override
Dump this line with its attributes.
Definition TLine.cxx:419
UInt_t GetNCpu()
Definition Config.h:70
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
MsgLogger & Log() const
Adadelta Optimizer class.
Definition Adadelta.h:45
Adagrad Optimizer class.
Definition Adagrad.h:45
Adam Optimizer class.
Definition Adam.h:45
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition Cpu.h:108
static bool IsCudnn()
Definition Cpu.h:131
Generic Deep Neural Network class.
Definition DeepNet.h:73
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition DeepNet.h:825
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
Definition DeepNet.h:608
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition DeepNet.h:740
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
Definition DeepNet.h:567
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition DeepNet.h:485
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition DeepNet.h:439
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition DeepNet.h:773
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition DeepNet.h:524
Generic layer class.
Definition DenseLayer.h:59
RMSProp Optimizer class.
Definition RMSProp.h:45
Stochastic Batch Gradient Descent Optimizer class.
Definition SGD.h:46
Generic General Layer class.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Class that contains all the data information.
Definition DataSetInfo.h:62
UInt_t GetNClasses() const
Types::ETreeType GetCurrentType() const
Definition DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition Event.cxx:367
UInt_t GetNVariables() const
accessor to the number of variables
Definition Event.cxx:316
UInt_t GetClass() const
Definition Event.h:86
Virtual base Class for all MVA method.
Definition MethodBase.h:111
const char * GetName() const
Definition MethodBase.h:334
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition MethodBase.h:686
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
Definition MethodBase.h:346
const TString & GetMethodName() const
Definition MethodBase.h:331
const Event * GetEvent() const
Definition MethodBase.h:751
DataSetInfo & DataInfo() const
Definition MethodBase.h:410
UInt_t GetNVariables() const
Definition MethodBase.h:345
Types::EAnalysisType fAnalysisType
Definition MethodBase.h:595
UInt_t GetNvar() const
Definition MethodBase.h:344
TrainingHistory fTrainHistory
Definition MethodBase.h:425
DataSet * Data() const
Definition MethodBase.h:409
IPythonInteractive * fInteractive
temporary dataset used when evaluating on a different data (used by MethodCategory::GetMvaValues)
Definition MethodBase.h:448
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition MethodDL.h:108
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition MethodDL.h:183
void GetHelpMessage() const
DNN::ELossFunction fLossFunction
The loss function.
Definition MethodDL.h:190
std::vector< size_t > fInputShape
Contains the batch size (no.
Definition MethodDL.h:178
TString fLayoutString
The string defining the layout of the deep net.
Definition MethodDL.h:194
void SetInputDepth(int inputDepth)
Setters.
Definition MethodDL.h:286
std::unique_ptr< MatrixImpl_t > fYHat
Definition MethodDL.h:208
void Train()
Methods for training the deep learning network.
size_t GetBatchHeight() const
Definition MethodDL.h:263
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class Here we will evaluate...
TString fWeightInitializationString
The string defining the weight initialization method.
Definition MethodDL.h:197
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition MethodDL.cxx:768
TensorImpl_t fXInput
Definition MethodDL.h:206
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition MethodDL.h:186
virtual const std::vector< Float_t > & GetMulticlassValues()
TString fArchitectureString
The string defining the architecture: CPU or GPU.
Definition MethodDL.h:198
void Init()
default initializations
Definition MethodDL.cxx:432
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
void TrainDeepNet()
train of deep neural network using the defined architecture
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition MethodDL.h:280
DNN::EOutputFunction GetOutputFunction() const
Definition MethodDL.h:269
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition MethodDL.cxx:583
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition MethodDL.h:273
void SetInputWidth(int inputWidth)
Definition MethodDL.h:288
void ProcessOptions()
Definition MethodDL.cxx:219
HostBufferImpl_t fXInputBuffer
Definition MethodDL.h:207
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition MethodDL.h:184
size_t GetInputDepth() const
Definition MethodDL.h:255
std::unique_ptr< DeepNetImpl_t > fNet
Definition MethodDL.h:209
TString GetInputLayoutString() const
Definition MethodDL.h:272
void SetBatchHeight(size_t batchHeight)
Definition MethodDL.h:293
size_t GetInputHeight() const
Definition MethodDL.h:256
TString GetArchitectureString() const
Definition MethodDL.h:278
void ParseBatchLayout()
Parse the input layout.
Definition MethodDL.cxx:482
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition MethodDL.cxx:890
void ReadWeightsFromStream(std::istream &)
void ReadWeightsFromXML(void *wghtnode)
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition MethodDL.h:199
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition MethodDL.h:93
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition MethodDL.h:189
DNN::EInitialization fWeightInitialization
The initialization method.
Definition MethodDL.h:188
size_t GetBatchDepth() const
Definition MethodDL.h:262
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition MethodDL.cxx:931
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition MethodDL.h:204
size_t GetInputWidth() const
Definition MethodDL.h:257
void SetInputShape(std::vector< size_t > inputShape)
Definition MethodDL.h:289
DNN::ELossFunction GetLossFunction() const
Definition MethodDL.h:270
TString fBatchLayoutString
The string defining the layout of the batch.
Definition MethodDL.h:193
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition MethodDL.cxx:669
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition MethodDL.cxx:829
virtual const std::vector< Float_t > & GetRegressionValues()
TString fTrainingStrategyString
The string defining the training strategy.
Definition MethodDL.h:196
const Ranking * CreateRanking()
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition MethodDL.h:110
void SetBatchDepth(size_t batchDepth)
Definition MethodDL.h:292
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
void SetBatchWidth(size_t batchWidth)
Definition MethodDL.h:294
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
DNN::EInitialization GetWeightInitialization() const
Definition MethodDL.h:268
void SetBatchSize(size_t batchSize)
Definition MethodDL.h:291
TString GetLayoutString() const
Definition MethodDL.h:274
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition MethodDL.h:182
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition MethodDL.h:106
size_t GetBatchWidth() const
Definition MethodDL.h:264
void AddWeightsXMLTo(void *parent) const
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition MethodDL.h:107
virtual ~MethodDL()
Virtual Destructor.
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
void ParseInputLayout()
Parse the input layout.
Definition MethodDL.cxx:439
void FillInputTensor()
Get the input event tensor for evaluation Internal function to fill the fXInput tensor with the corre...
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition MethodDL.h:201
void SetInputHeight(int inputHeight)
Definition MethodDL.h:287
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition MethodDL.cxx:529
TString fErrorStrategy
The string defining the error strategy for training.
Definition MethodDL.h:195
void DeclareOptions()
The option handling methods.
Definition MethodDL.cxx:167
TString fInputLayoutString
The string defining the layout of the input.
Definition MethodDL.h:192
EMsgType GetMinType() const
Definition MsgLogger.h:69
Ranking for variables in method (implementation)
Definition Ranking.h:48
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition Timer.cxx:146
TXMLEngine & xmlengine()
Definition Tools.h:262
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1150
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
TString StringFromInt(Long_t i)
string tools
Definition Tools.cxx:1223
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1162
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kMulticlass
Definition Types.h:129
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
@ kFATAL
Definition Types.h:61
void Print(Option_t *option="") const override
Dump this marker with its attributes.
Definition TMarker.cxx:339
void Print(Option_t *name="") const override
Print the matrix as a table of elements.
TMatrixT.
Definition TMatrixT.h:40
void Print(Option_t *option="") const override
Print TNamed name and title.
Definition TNamed.cxx:128
An array of TObjects.
Definition TObjArray.h:31
Collectable string class.
Definition TObjString.h:28
const TString & GetString() const
Definition TObjString.h:46
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition TObject.cxx:973
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition TObject.cxx:987
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition TObject.cxx:636
Basic string class.
Definition TString.h:139
Ssiz_t Length() const
Definition TString.h:417
Int_t Atoi() const
Return integer value of string.
Definition TString.cxx:1988
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition TString.cxx:1163
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition TString.cxx:1858
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition TString.cxx:538
const char * Data() const
Definition TString.h:376
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition TString.h:704
@ kTrailing
Definition TString.h:276
@ kBoth
Definition TString.h:276
void ToUpper()
Change string to upper case.
Definition TString.cxx:1195
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition TString.cxx:2264
Bool_t IsNull() const
Definition TString.h:414
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2378
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
const Int_t n
Definition legend1.C:16
#define I(x, y, z)
EOptimizer
Enum representing the optimizer used for training.
Definition Functions.h:82
EOutputFunction
Enum that represents output functions.
Definition Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition Functions.h:238
ERegularization
Enum representing the regularization type applied for a given layer.
Definition Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition Functions.h:57
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition DataLoader.h:40
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition MethodDL.cxx:75
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Bool_t IsNaN(Double_t x)
Definition TMath.h:892
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition TMath.h:756
All of the options that can be specified in the training string.
Definition MethodDL.h:72
std::map< TString, double > optimizerParams
Definition MethodDL.h:84
DNN::EOptimizer optimizer
Definition MethodDL.h:78
DNN::ERegularization regularization
Definition MethodDL.h:77
std::vector< Double_t > dropoutProbabilities
Definition MethodDL.h:83
TMarker m
Definition textangle.C:8
TLine l
Definition textangle.C:4
auto * t1
Definition textangle.C:20
static void output()