Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodDL.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/cnn:$Id$Ndl
2// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3/**********************************************************************************
4 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5 * Package: TMVA *
6 * Class : MethodDL *
7 * Web : http://tmva.sourceforge.net *
8 * *
9 * Description: *
10 * Deep Neural Network Method *
11 * *
12 * Authors (alphabetical): *
13 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28#include "TFormula.h"
29#include "TString.h"
30#include "TMath.h"
31#include "TObjString.h"
32
33#include "TMVA/Tools.h"
34#include "TMVA/Configurable.h"
35#include "TMVA/IMethod.h"
37#include "TMVA/MethodDL.h"
38#include "TMVA/Types.h"
40#include "TMVA/DNN/Functions.h"
42#include "TMVA/DNN/SGD.h"
43#include "TMVA/DNN/Adam.h"
44#include "TMVA/DNN/Adagrad.h"
45#include "TMVA/DNN/RMSProp.h"
46#include "TMVA/DNN/Adadelta.h"
47#include "TMVA/Timer.h"
48
49#ifdef R__HAS_TMVAGPU
51#ifdef R__HAS_CUDNN
53#endif
54#endif
55
56#include <chrono>
57
60
61using namespace TMVA::DNN::CNN;
62using namespace TMVA::DNN;
63
69
70
71namespace TMVA {
72
73
74////////////////////////////////////////////////////////////////////////////////
75TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
76{
77 key.ToUpper();
78 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
79 if (it == keyValueMap.end()) {
80 return TString("");
81 }
82 return it->second;
83}
84
85////////////////////////////////////////////////////////////////////////////////
86template <typename T>
87T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
88
89////////////////////////////////////////////////////////////////////////////////
90template <>
91int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
92{
93 TString value(fetchValueTmp(keyValueMap, key));
94 if (value == "") {
95 return defaultValue;
96 }
97 return value.Atoi();
98}
99
100////////////////////////////////////////////////////////////////////////////////
101template <>
102double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
103{
104 TString value(fetchValueTmp(keyValueMap, key));
105 if (value == "") {
106 return defaultValue;
107 }
108 return value.Atof();
109}
110
111////////////////////////////////////////////////////////////////////////////////
112template <>
113TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
114{
115 TString value(fetchValueTmp(keyValueMap, key));
116 if (value == "") {
117 return defaultValue;
118 }
119 return value;
120}
121
122////////////////////////////////////////////////////////////////////////////////
123template <>
124bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
125{
126 TString value(fetchValueTmp(keyValueMap, key));
127 if (value == "") {
128 return defaultValue;
129 }
130
131 value.ToUpper();
132 if (value == "TRUE" || value == "T" || value == "1") {
133 return true;
134 }
135
136 return false;
137}
138
139////////////////////////////////////////////////////////////////////////////////
140template <>
141std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
142 std::vector<double> defaultValue)
143{
144 TString parseString(fetchValueTmp(keyValueMap, key));
145 if (parseString == "") {
146 return defaultValue;
147 }
148
149 parseString.ToUpper();
150 std::vector<double> values;
151
152 const TString tokenDelim("+");
153 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
154 TIter nextToken(tokenStrings);
155 TObjString *tokenString = (TObjString *)nextToken();
156 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
157 std::stringstream sstr;
158 double currentValue;
159 sstr << tokenString->GetString().Data();
160 sstr >> currentValue;
161 values.push_back(currentValue);
162 }
163 return values;
164}
165
166////////////////////////////////////////////////////////////////////////////////
168{
169 // Set default values for all option strings
170
171 DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
172
173 DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
174
175 DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
176
177 DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
178 " or cross entropy (binary classification).");
179 AddPreDefVal(TString("CROSSENTROPY"));
180 AddPreDefVal(TString("SUMOFSQUARES"));
181 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
182
183 DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
184 AddPreDefVal(TString("XAVIER"));
185 AddPreDefVal(TString("XAVIERUNIFORM"));
186 AddPreDefVal(TString("GAUSS"));
187 AddPreDefVal(TString("UNIFORM"));
188 AddPreDefVal(TString("IDENTITY"));
189 AddPreDefVal(TString("ZERO"));
190
191 DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
192
193 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
194 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
195 "Specify as 100 to use exactly 100 events. (Default: 20%)");
196
197 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
198 AddPreDefVal(TString("STANDARD")); // deprecated and not supported anymore
199 AddPreDefVal(TString("CPU"));
200 AddPreDefVal(TString("GPU"));
201 AddPreDefVal(TString("OPENCL")); // not yet implemented
202 AddPreDefVal(TString("CUDNN")); // not needed (by default GPU is now CUDNN if available)
203
204 // define training strategy separated by a separator "|"
205 DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-3,"
206 "Momentum=0.0,"
207 "ConvergenceSteps=100,"
208 "MaxEpochs=2000,"
209 "Optimizer=ADAM,"
210 "BatchSize=30,"
211 "TestRepetitions=1,"
212 "WeightDecay=0.0,"
213 "Regularization=None,"
214 "DropConfig=0.0",
215 "TrainingStrategy", "Defines the training strategies.");
216}
217
218////////////////////////////////////////////////////////////////////////////////
220{
221
223 Log() << kINFO << "Will ignore negative events in training!" << Endl;
224 }
225
226 if (fArchitectureString == "STANDARD") {
227 Log() << kWARNING << "The STANDARD architecture is not supported anymore. "
228 "Please use Architecture=CPU or Architecture=CPU."
229 "See the TMVA Users' Guide for instructions if you "
230 "encounter problems."
231 << Endl;
232 Log() << kINFO << "We will use instead the CPU architecture" << Endl;
233 fArchitectureString = "CPU";
234 }
235 if (fArchitectureString == "OPENCL") {
236 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
237 "Please use Architecture=CPU or Architecture=CPU for the "
238 "time being. See the TMVA Users' Guide for instructions "
239 "if you encounter problems."
240 << Endl;
241 // use instead GPU
242 Log() << kINFO << "We will try using the GPU-CUDA architecture if available" << Endl;
243 fArchitectureString = "GPU";
244 }
245
246 // the architecture can now be set at runtime as an option
247
248
249 if (fArchitectureString == "GPU" || fArchitectureString == "CUDNN") {
250#ifdef R__HAS_TMVAGPU
251 Log() << kINFO << "Will now use the GPU architecture !" << Endl;
252#else // case TMVA does not support GPU
253 Log() << kERROR << "CUDA backend not enabled. Please make sure "
254 "you have CUDA installed and it was successfully "
255 "detected by CMAKE by using -Dtmva-gpu=On "
256 << Endl;
257 fArchitectureString = "CPU";
258 Log() << kINFO << "Will now use instead the CPU architecture !" << Endl;
259#endif
260 }
261
262 if (fArchitectureString == "CPU") {
263#ifdef R__HAS_TMVACPU // TMVA has CPU BLAS and IMT support
264 Log() << kINFO << "Will now use the CPU architecture with BLAS and IMT support !" << Endl;
265#else // TMVA has no CPU BLAS or IMT support
266 Log() << kINFO << "Multi-core CPU backend not enabled. For better performances, make sure "
267 "you have a BLAS implementation and it was successfully "
268 "detected by CMake as well that the imt CMake flag is set."
269 << Endl;
270 Log() << kINFO << "Will use anyway the CPU architecture but with slower performance" << Endl;
271#endif
272 }
273
274 // Input Layout
277
278 // Loss function and output.
279 fOutputFunction = EOutputFunction::kSigmoid;
281 if (fErrorStrategy == "SUMOFSQUARES") {
282 fLossFunction = ELossFunction::kMeanSquaredError;
283 }
284 if (fErrorStrategy == "CROSSENTROPY") {
285 fLossFunction = ELossFunction::kCrossEntropy;
286 }
287 fOutputFunction = EOutputFunction::kSigmoid;
288 } else if (fAnalysisType == Types::kRegression) {
289 if (fErrorStrategy != "SUMOFSQUARES") {
290 Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
291 << " neural net error function. Setting error function to "
292 << " SUMOFSQUARES now." << Endl;
293 }
294
295 fLossFunction = ELossFunction::kMeanSquaredError;
296 fOutputFunction = EOutputFunction::kIdentity;
297 } else if (fAnalysisType == Types::kMulticlass) {
298 if (fErrorStrategy == "SUMOFSQUARES") {
299 fLossFunction = ELossFunction::kMeanSquaredError;
300 }
301 if (fErrorStrategy == "CROSSENTROPY") {
302 fLossFunction = ELossFunction::kCrossEntropy;
303 }
304 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
305 fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
306 }
307 fOutputFunction = EOutputFunction::kSoftmax;
308 }
309
310 // Initialization
311 // the biases will be always initialized to zero
312 if (fWeightInitializationString == "XAVIER") {
314 } else if (fWeightInitializationString == "XAVIERUNIFORM") {
316 } else if (fWeightInitializationString == "GAUSS") {
318 } else if (fWeightInitializationString == "UNIFORM") {
320 } else if (fWeightInitializationString == "ZERO") {
322 } else if (fWeightInitializationString == "IDENTITY") {
324 } else {
326 }
327
328 // Training settings.
329
331 for (auto &block : strategyKeyValues) {
332 TTrainingSettings settings;
333
334 settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
335 settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
336 settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
337 settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
338 settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
339 settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
340 settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
341 settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
342
343 TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
344 if (regularization == "L1") {
346 } else if (regularization == "L2") {
348 } else {
350 }
351
352 TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
353 settings.optimizerName = optimizer;
354 if (optimizer == "SGD") {
356 } else if (optimizer == "ADAM") {
358 } else if (optimizer == "ADAGRAD") {
360 } else if (optimizer == "RMSPROP") {
362 } else if (optimizer == "ADADELTA") {
364 } else {
365 // Make Adam as default choice if the input string is
366 // incorrect.
368 settings.optimizerName = "ADAM";
369 }
370 // check for specific optimizer parameters
371 std::vector<TString> optimParamLabels = {"_beta1", "_beta2", "_eps", "_rho"};
372 //default values
373 std::map<TString, double> defaultValues = {
374 {"ADADELTA_eps", 1.E-8}, {"ADADELTA_rho", 0.95},
375 {"ADAGRAD_eps", 1.E-8},
376 {"ADAM_beta1", 0.9}, {"ADAM_beta2", 0.999}, {"ADAM_eps", 1.E-7},
377 {"RMSPROP_eps", 1.E-7}, {"RMSPROP_rho", 0.9},
378 };
379 for (auto &pN : optimParamLabels) {
380 TString optimParamName = settings.optimizerName + pN;
381 // check if optimizer has default values for this specific parameters
382 if (defaultValues.count(optimParamName) > 0) {
383 double defValue = defaultValues[optimParamName];
384 double val = fetchValueTmp(block, optimParamName, defValue);
385 // create entry in settings for this optimizer parameter
386 settings.optimizerParams[optimParamName] = val;
387 }
388 }
389
390 fTrainingSettings.push_back(settings);
391 }
392
393 // this set fInputShape[0] = batchSize
394 this->SetBatchSize(fTrainingSettings.front().batchSize);
395
396 // case inputlayout and batch layout was not given. Use default then
397 // (1, batchsize, nvariables)
398 // fInputShape[0] -> BatchSize
399 // fInputShape[1] -> InputDepth
400 // fInputShape[2] -> InputHeight
401 // fInputShape[3] -> InputWidth
402 if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
403 fInputShape[1] = 1;
404 fInputShape[2] = 1;
406 }
407 // case when batch layout is not provided (all zero)
408 // batch layout can be determined by the input layout + batch size
409 // case DNN : { 1, B, W }
410 // case CNN : { B, C, H*W}
411 // case RNN : { B, T, H*W }
412
413 if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
414 // case first layer is DENSE
415 if (fInputShape[2] == 1 && fInputShape[1] == 1) {
416 // case of (1, batchsize, input features)
417 fBatchDepth = 1;
418 fBatchHeight = fTrainingSettings.front().batchSize;
420 }
421 else { // more general cases (e.g. for CNN)
422 // case CONV or RNN
423 fBatchDepth = fTrainingSettings.front().batchSize;
426 }
427 }
428}
429
430////////////////////////////////////////////////////////////////////////////////
431/// default initializations
433{
434 // Nothing to do here
435}
436
437////////////////////////////////////////////////////////////////////////////////
438/// Parse the input layout
440{
441 // Define the delimiter
442 const TString delim("|");
443
444 // Get the input layout string
445 TString inputLayoutString = this->GetInputLayoutString();
446
447 // Split the input layout string
448 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
449 TIter nextInputDim(inputDimStrings);
450 TObjString *inputDimString = (TObjString *)nextInputDim();
451
452 // Go through every token and save its absolute value in the shape array
453 // The first token is the batch size for easy compatibility with cudnn
454 int subDim = 1;
455 std::vector<size_t> inputShape;
456 inputShape.reserve(inputLayoutString.Length()/2 + 2);
457 inputShape.push_back(0); // Will be set later by Trainingsettings, use 0 value now
458 for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
459 // size_t is unsigned
460 subDim = (size_t) abs(inputDimString->GetString().Atoi());
461 // Size among unused dimensions should be set to 1 for cudnn
462 //if (subDim == 0) subDim = 1;
463 inputShape.push_back(subDim);
464 }
465 // it is expected that empty Shape has at least 4 dimensions. We pad the missing one's with 1
466 // for example in case of dense layer input layouts
467 // when we will support 3D convolutions we would need to add extra 1's
468 if (inputShape.size() == 2) {
469 // case of dense layer where only width is specified
470 inputShape.insert(inputShape.begin() + 1, {1,1});
471 }
472 else if (inputShape.size() == 3) {
473 //e.g. case of RNN T,W -> T,1,W
474 inputShape.insert(inputShape.begin() + 2, 1);
475 }
476
477 this->SetInputShape(inputShape);
478}
479
480////////////////////////////////////////////////////////////////////////////////
481/// Parse the input layout
483{
484 // Define the delimiter
485 const TString delim("|");
486
487 // Get the input layout string
488 TString batchLayoutString = this->GetBatchLayoutString();
489
490 size_t batchDepth = 0;
491 size_t batchHeight = 0;
492 size_t batchWidth = 0;
493
494 // Split the input layout string
495 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
496 TIter nextBatchDim(batchDimStrings);
497 TObjString *batchDimString = (TObjString *)nextBatchDim();
498 int idxToken = 0;
499
500 for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
501 switch (idxToken) {
502 case 0: // input depth
503 {
504 TString strDepth(batchDimString->GetString());
505 batchDepth = (size_t)strDepth.Atoi();
506 } break;
507 case 1: // input height
508 {
509 TString strHeight(batchDimString->GetString());
510 batchHeight = (size_t)strHeight.Atoi();
511 } break;
512 case 2: // input width
513 {
514 TString strWidth(batchDimString->GetString());
515 batchWidth = (size_t)strWidth.Atoi();
516 } break;
517 }
518 ++idxToken;
519 }
520
521 this->SetBatchDepth(batchDepth);
522 this->SetBatchHeight(batchHeight);
523 this->SetBatchWidth(batchWidth);
524}
525
526////////////////////////////////////////////////////////////////////////////////
527/// Create a deep net based on the layout string
528template <typename Architecture_t, typename Layer_t>
531{
532 // Layer specification, layer details
533 const TString layerDelimiter(",");
534 const TString subDelimiter("|");
535
536 TString layoutString = this->GetLayoutString();
537
538 //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
539
540 // Split layers
541 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
542 TIter nextLayer(layerStrings);
543 TObjString *layerString = (TObjString *)nextLayer();
544
545
546 for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
547
548 // Split layer details
549 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
550 TIter nextToken(subStrings);
551 TObjString *token = (TObjString *)nextToken();
552
553 // Determine the type of the layer
554 TString strLayerType = token->GetString();
555
556
557 if (strLayerType == "DENSE") {
558 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
559 } else if (strLayerType == "CONV") {
560 ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
561 } else if (strLayerType == "MAXPOOL") {
562 ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
563 } else if (strLayerType == "RESHAPE") {
564 ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
565 } else if (strLayerType == "BNORM") {
566 ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
567 } else if (strLayerType == "RNN") {
568 ParseRecurrentLayer(kLayerRNN, deepNet, nets, layerString->GetString(), subDelimiter);
569 } else if (strLayerType == "LSTM") {
570 ParseRecurrentLayer(kLayerLSTM, deepNet, nets, layerString->GetString(), subDelimiter);
571 } else if (strLayerType == "GRU") {
572 ParseRecurrentLayer(kLayerGRU, deepNet, nets, layerString->GetString(), subDelimiter);
573 } else {
574 // no type of layer specified - assume is dense layer as in old DNN interface
575 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
576 }
577 }
578}
579
580////////////////////////////////////////////////////////////////////////////////
581/// Pases the layer string and creates the appropriate dense layer
582template <typename Architecture_t, typename Layer_t>
584 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
585 TString delim)
586{
587 int width = 0;
588 EActivationFunction activationFunction = EActivationFunction::kTanh;
589
590 // this return number of input variables for the method
591 // it can be used to deduce width of dense layer if specified as N+10
592 // where N is the number of input variables
593 const size_t inputSize = GetNvar();
594
595 // Split layer details
596 TObjArray *subStrings = layerString.Tokenize(delim);
597 TIter nextToken(subStrings);
598 TObjString *token = (TObjString *)nextToken();
599 int idxToken = 0;
600
601 // loop on the tokens
602 // order of sepcifying width and activation function is not relevant
603 // both 100|TANH and TANH|100 are valid cases
604 for (; token != nullptr; token = (TObjString *)nextToken()) {
605 idxToken++;
606 // try a match with the activation function
607 TString strActFnc(token->GetString());
608 // if first token defines the layer type- skip it
609 if (strActFnc =="DENSE") continue;
610
611 if (strActFnc == "RELU") {
612 activationFunction = DNN::EActivationFunction::kRelu;
613 } else if (strActFnc == "TANH") {
614 activationFunction = DNN::EActivationFunction::kTanh;
615 } else if (strActFnc == "FTANH") {
616 activationFunction = DNN::EActivationFunction::kFastTanh;
617 } else if (strActFnc == "SYMMRELU") {
618 activationFunction = DNN::EActivationFunction::kSymmRelu;
619 } else if (strActFnc == "SOFTSIGN") {
620 activationFunction = DNN::EActivationFunction::kSoftSign;
621 } else if (strActFnc == "SIGMOID") {
622 activationFunction = DNN::EActivationFunction::kSigmoid;
623 } else if (strActFnc == "LINEAR") {
624 activationFunction = DNN::EActivationFunction::kIdentity;
625 } else if (strActFnc == "GAUSS") {
626 activationFunction = DNN::EActivationFunction::kGauss;
627 } else if (width == 0) {
628 // no match found try to parse as text showing the width
629 // support for input a formula where the variable 'x' is 'N' in the string
630 // use TFormula for the evaluation
631 TString strNumNodes = strActFnc;
632 // number of nodes
633 TString strN("x");
634 strNumNodes.ReplaceAll("N", strN);
635 strNumNodes.ReplaceAll("n", strN);
636 TFormula fml("tmp", strNumNodes);
637 width = fml.Eval(inputSize);
638 }
639 }
640 // avoid zero width. assume is last layer and give width = output width
641 // Determine the number of outputs
642 size_t outputSize = 1;
644 outputSize = GetNTargets();
645 } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
646 outputSize = DataInfo().GetNClasses();
647 }
648 if (width == 0) width = outputSize;
649
650 // Add the dense layer, initialize the weights and biases and copy
651 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
652 denseLayer->Initialize();
653
654 // add same layer to fNet
655 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
656
657 //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
658
659 // add the copy to all slave nets
660 //for (size_t i = 0; i < nets.size(); i++) {
661 // nets[i].AddDenseLayer(copyDenseLayer);
662 //}
663
664 // check compatibility of added layer
665 // for a dense layer input should be 1 x 1 x DxHxW
666}
667
668////////////////////////////////////////////////////////////////////////////////
669/// Pases the layer string and creates the appropriate convolutional layer
670template <typename Architecture_t, typename Layer_t>
672 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
673 TString delim)
674{
675 int depth = 0;
676 int fltHeight = 0;
677 int fltWidth = 0;
678 int strideRows = 0;
679 int strideCols = 0;
680 int zeroPadHeight = 0;
681 int zeroPadWidth = 0;
682 EActivationFunction activationFunction = EActivationFunction::kTanh;
683
684 // Split layer details
685 TObjArray *subStrings = layerString.Tokenize(delim);
686 TIter nextToken(subStrings);
687 TObjString *token = (TObjString *)nextToken();
688 int idxToken = 0;
689
690 for (; token != nullptr; token = (TObjString *)nextToken()) {
691 switch (idxToken) {
692 case 1: // depth
693 {
694 TString strDepth(token->GetString());
695 depth = strDepth.Atoi();
696 } break;
697 case 2: // filter height
698 {
699 TString strFltHeight(token->GetString());
700 fltHeight = strFltHeight.Atoi();
701 } break;
702 case 3: // filter width
703 {
704 TString strFltWidth(token->GetString());
705 fltWidth = strFltWidth.Atoi();
706 } break;
707 case 4: // stride in rows
708 {
709 TString strStrideRows(token->GetString());
710 strideRows = strStrideRows.Atoi();
711 } break;
712 case 5: // stride in cols
713 {
714 TString strStrideCols(token->GetString());
715 strideCols = strStrideCols.Atoi();
716 } break;
717 case 6: // zero padding height
718 {
719 TString strZeroPadHeight(token->GetString());
720 zeroPadHeight = strZeroPadHeight.Atoi();
721 } break;
722 case 7: // zero padding width
723 {
724 TString strZeroPadWidth(token->GetString());
725 zeroPadWidth = strZeroPadWidth.Atoi();
726 } break;
727 case 8: // activation function
728 {
729 TString strActFnc(token->GetString());
730 if (strActFnc == "RELU") {
731 activationFunction = DNN::EActivationFunction::kRelu;
732 } else if (strActFnc == "TANH") {
733 activationFunction = DNN::EActivationFunction::kTanh;
734 } else if (strActFnc == "SYMMRELU") {
735 activationFunction = DNN::EActivationFunction::kSymmRelu;
736 } else if (strActFnc == "SOFTSIGN") {
737 activationFunction = DNN::EActivationFunction::kSoftSign;
738 } else if (strActFnc == "SIGMOID") {
739 activationFunction = DNN::EActivationFunction::kSigmoid;
740 } else if (strActFnc == "LINEAR") {
741 activationFunction = DNN::EActivationFunction::kIdentity;
742 } else if (strActFnc == "GAUSS") {
743 activationFunction = DNN::EActivationFunction::kGauss;
744 }
745 } break;
746 }
747 ++idxToken;
748 }
749
750 // Add the convolutional layer, initialize the weights and biases and copy
751 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
752 zeroPadHeight, zeroPadWidth, activationFunction);
753 convLayer->Initialize();
754
755 // Add same layer to fNet
756 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
757 zeroPadHeight, zeroPadWidth, activationFunction);
758
759 //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
760
761 //// add the copy to all slave nets
762 //for (size_t i = 0; i < nets.size(); i++) {
763 // nets[i].AddConvLayer(copyConvLayer);
764 //}
765}
766
767////////////////////////////////////////////////////////////////////////////////
768/// Pases the layer string and creates the appropriate max pool layer
769template <typename Architecture_t, typename Layer_t>
771 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
772 TString delim)
773{
774
775 int filterHeight = 0;
776 int filterWidth = 0;
777 int strideRows = 0;
778 int strideCols = 0;
779
780 // Split layer details
781 TObjArray *subStrings = layerString.Tokenize(delim);
782 TIter nextToken(subStrings);
783 TObjString *token = (TObjString *)nextToken();
784 int idxToken = 0;
785
786 for (; token != nullptr; token = (TObjString *)nextToken()) {
787 switch (idxToken) {
788 case 1: // filter height
789 {
790 TString strFrmHeight(token->GetString());
791 filterHeight = strFrmHeight.Atoi();
792 } break;
793 case 2: // filter width
794 {
795 TString strFrmWidth(token->GetString());
796 filterWidth = strFrmWidth.Atoi();
797 } break;
798 case 3: // stride in rows
799 {
800 TString strStrideRows(token->GetString());
801 strideRows = strStrideRows.Atoi();
802 } break;
803 case 4: // stride in cols
804 {
805 TString strStrideCols(token->GetString());
806 strideCols = strStrideCols.Atoi();
807 } break;
808 }
809 ++idxToken;
810 }
811
812 // Add the Max pooling layer
813 // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
814 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
815
816 // Add the same layer to fNet
817 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
818
819
820 //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
821
822 //// add the copy to all slave nets
823 //for (size_t i = 0; i < nets.size(); i++) {
824 // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
825 //}
826}
827
828////////////////////////////////////////////////////////////////////////////////
829/// Pases the layer string and creates the appropriate reshape layer
830template <typename Architecture_t, typename Layer_t>
832 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
833 TString delim)
834{
835 int depth = 0;
836 int height = 0;
837 int width = 0;
838 bool flattening = false;
839
840 // Split layer details
841 TObjArray *subStrings = layerString.Tokenize(delim);
842 TIter nextToken(subStrings);
843 TObjString *token = (TObjString *)nextToken();
844 int idxToken = 0;
845
846 for (; token != nullptr; token = (TObjString *)nextToken()) {
847 if (token->GetString() == "FLAT") idxToken=4;
848 switch (idxToken) {
849 case 1: {
850 TString strDepth(token->GetString());
851 depth = strDepth.Atoi();
852 } break;
853 case 2: // height
854 {
855 TString strHeight(token->GetString());
856 height = strHeight.Atoi();
857 } break;
858 case 3: // width
859 {
860 TString strWidth(token->GetString());
861 width = strWidth.Atoi();
862 } break;
863 case 4: // flattening
864 {
865 TString flat(token->GetString());
866 if (flat == "FLAT") {
867 flattening = true;
868 }
869 } break;
870 }
871 ++idxToken;
872 }
873
874 // Add the reshape layer
875 // TReshapeLayer<Architecture_t> *reshapeLayer =
876 deepNet.AddReshapeLayer(depth, height, width, flattening);
877
878 // Add the same layer to fNet
879 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
880
881 //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
882
883 //// add the copy to all slave nets
884 //for (size_t i = 0; i < nets.size(); i++) {
885 // nets[i].AddReshapeLayer(copyReshapeLayer);
886 //}
887}
888
889////////////////////////////////////////////////////////////////////////////////
890/// Pases the layer string and creates the appropriate reshape layer
891template <typename Architecture_t, typename Layer_t>
893 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
894 TString delim)
895{
896
897 // default values
898 double momentum = -1; //0.99;
899 double epsilon = 0.0001;
900
901 // Split layer details
902 TObjArray *subStrings = layerString.Tokenize(delim);
903 TIter nextToken(subStrings);
904 TObjString *token = (TObjString *)nextToken();
905 int idxToken = 0;
906
907 for (; token != nullptr; token = (TObjString *)nextToken()) {
908 switch (idxToken) {
909 case 1: {
910 momentum = std::atof(token->GetString().Data());
911 } break;
912 case 2: // height
913 {
914 epsilon = std::atof(token->GetString().Data());
915 } break;
916 }
917 ++idxToken;
918 }
919
920 // Add the batch norm layer
921 //
922 auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
923 layer->Initialize();
924
925 // Add the same layer to fNet
926 if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
927
928}
929
930////////////////////////////////////////////////////////////////////////////////
931/// Pases the layer string and creates the appropriate rnn layer
932template <typename Architecture_t, typename Layer_t>
934 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
935 TString delim)
936{
937 // int depth = 0;
938 int stateSize = 0;
939 int inputSize = 0;
940 int timeSteps = 0;
941 bool rememberState = false;
942 bool returnSequence = false;
943 bool resetGateAfter = false;
944
945 // Split layer details
946 TObjArray *subStrings = layerString.Tokenize(delim);
947 TIter nextToken(subStrings);
948 TObjString *token = (TObjString *)nextToken();
949 int idxToken = 0;
950
951 for (; token != nullptr; token = (TObjString *)nextToken()) {
952 switch (idxToken) {
953 case 1: // state size
954 {
955 TString strstateSize(token->GetString());
956 stateSize = strstateSize.Atoi();
957 break;
958 }
959 case 2: // input size
960 {
961 TString strinputSize(token->GetString());
962 inputSize = strinputSize.Atoi();
963 break;
964 }
965 case 3: // time steps
966 {
967 TString strtimeSteps(token->GetString());
968 timeSteps = strtimeSteps.Atoi();
969 break;
970 }
971 case 4: // returnSequence (option stateful in Keras)
972 {
973 TString strrememberState(token->GetString());
974 rememberState = (bool) strrememberState.Atoi();
975 break;
976 }
977 case 5: // return full output sequence (1 or 0)
978 {
979 TString str(token->GetString());
980 returnSequence = (bool)str.Atoi();
981 break;
982 }
983 case 6: // resetGate after option (only for GRU)
984 {
985 TString str(token->GetString());
986 resetGateAfter = (bool)str.Atoi();
987 }
988 }
989 ++idxToken;
990 }
991
992 // Add the recurrent layer, initialize the weights and biases and copy
993 if (rnnType == kLayerRNN) {
994 auto * recurrentLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
995 recurrentLayer->Initialize();
996 // Add same layer to fNet
997 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
998 }
999 else if (rnnType == kLayerLSTM ) {
1000 auto *recurrentLayer = deepNet.AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1001 recurrentLayer->Initialize();
1002 // Add same layer to fNet
1003 if (fBuildNet)
1004 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1005 }
1006 else if (rnnType == kLayerGRU) {
1007 if (Architecture_t::IsCudnn()) resetGateAfter = true; // needed for Cudnn
1008 auto *recurrentLayer = deepNet.AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1009 recurrentLayer->Initialize();
1010 // Add same layer to fNet
1011 if (fBuildNet)
1012 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1013 }
1014 else {
1015 Log() << kFATAL << "Invalid Recurrent layer type " << Endl;
1016 }
1017}
1018
1019////////////////////////////////////////////////////////////////////////////////
1020/// Standard constructor.
1021MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
1022 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1023 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1024 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1025 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1026 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1027 fXInput()
1028{
1029 // Nothing to do here
1030}
1031
1032////////////////////////////////////////////////////////////////////////////////
1033/// Constructor from a weight file.
1034MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
1035 : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1036 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1037 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1038 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1039 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1040 fXInput()
1041{
1042 // Nothing to do here
1043}
1044
1045////////////////////////////////////////////////////////////////////////////////
1046/// Destructor.
1048{
1049 // Nothing to do here
1050}
1051
1052////////////////////////////////////////////////////////////////////////////////
1053/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1054auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1055{
1056 // remove empty spaces
1057 parseString.ReplaceAll(" ","");
1058 KeyValueVector_t blockKeyValues;
1059 const TString keyValueDelim("=");
1060
1061 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1062 TIter nextBlock(blockStrings);
1063 TObjString *blockString = (TObjString *)nextBlock();
1064
1065 for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1066 blockKeyValues.push_back(std::map<TString, TString>());
1067 std::map<TString, TString> &currentBlock = blockKeyValues.back();
1068
1069 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1070 TIter nextToken(subStrings);
1071 TObjString *token = (TObjString *)nextToken();
1072
1073 for (; token != nullptr; token = (TObjString *)nextToken()) {
1074 TString strKeyValue(token->GetString());
1075 int delimPos = strKeyValue.First(keyValueDelim.Data());
1076 if (delimPos <= 0) continue;
1077
1078 TString strKey = TString(strKeyValue(0, delimPos));
1079 strKey.ToUpper();
1080 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1081
1082 strKey.Strip(TString::kBoth, ' ');
1083 strValue.Strip(TString::kBoth, ' ');
1084
1085 currentBlock.insert(std::make_pair(strKey, strValue));
1086 }
1087 }
1088 return blockKeyValues;
1089}
1090
1091////////////////////////////////////////////////////////////////////////////////
1092/// What kind of analysis type can handle the CNN
1094{
1095 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1096 if (type == Types::kMulticlass) return kTRUE;
1097 if (type == Types::kRegression) return kTRUE;
1098
1099 return kFALSE;
1100}
1101
1102////////////////////////////////////////////////////////////////////////////////
1103/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1104/// 100 etc.
1105/// - 20% and 0.2 selects 20% of the training set as validation data.
1106/// - 100 selects 100 events as the validation data.
1107///
1108/// @return number of samples in validation set
1109///
1111{
1112 Int_t nValidationSamples = 0;
1113 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1114
1115 // Parsing + Validation
1116 // --------------------
1117 if (fNumValidationString.EndsWith("%")) {
1118 // Relative spec. format 20%
1119 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1120
1121 if (intValStr.IsFloat()) {
1122 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1123 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1124 } else {
1125 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1126 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1127 }
1128 } else if (fNumValidationString.IsFloat()) {
1129 Double_t valSizeAsDouble = fNumValidationString.Atof();
1130
1131 if (valSizeAsDouble < 1.0) {
1132 // Relative spec. format 0.2
1133 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1134 } else {
1135 // Absolute spec format 100 or 100.0
1136 nValidationSamples = valSizeAsDouble;
1137 }
1138 } else {
1139 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1140 << Endl;
1141 }
1142
1143 // Value validation
1144 // ----------------
1145 if (nValidationSamples < 0) {
1146 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1147 }
1148
1149 if (nValidationSamples == 0) {
1150 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1151 }
1152
1153 if (nValidationSamples >= (Int_t)trainingSetSize) {
1154 Log() << kFATAL << "Validation size \"" << fNumValidationString
1155 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1156 }
1157
1158 return nValidationSamples;
1159}
1160
1161
1162////////////////////////////////////////////////////////////////////////////////
1163/// Implementation of architecture specific train method
1164///
1165template <typename Architecture_t>
1167{
1168
1169 using Scalar_t = typename Architecture_t::Scalar_t;
1172 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1173
1174 bool debug = Log().GetMinType() == kDEBUG;
1175
1176
1177 // set the random seed for weight initialization
1178 Architecture_t::SetRandomSeed(fRandomSeed);
1179
1180 ///split training data in training and validation data
1181 // and determine the number of training and testing examples
1182
1183 size_t nValidationSamples = GetNumValidationSamples();
1184 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1185
1186 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1187 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1188 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1189
1190 size_t trainingPhase = 1;
1191
1192 for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1193
1194 size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1195
1196
1197 // After the processing of the options, initialize the master deep net
1198 size_t batchSize = settings.batchSize;
1199 this->SetBatchSize(batchSize);
1200 // Should be replaced by actual implementation. No support for this now.
1201 size_t inputDepth = this->GetInputDepth();
1202 size_t inputHeight = this->GetInputHeight();
1203 size_t inputWidth = this->GetInputWidth();
1204 size_t batchDepth = this->GetBatchDepth();
1205 size_t batchHeight = this->GetBatchHeight();
1206 size_t batchWidth = this->GetBatchWidth();
1207 ELossFunction J = this->GetLossFunction();
1209 ERegularization R = settings.regularization;
1210 EOptimizer O = settings.optimizer;
1211 Scalar_t weightDecay = settings.weightDecay;
1212
1213 //Batch size should be included in batch layout as well. There are two possibilities:
1214 // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1215 // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1216 // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1217 // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1218
1219 if (batchDepth != batchSize && batchDepth > 1) {
1220 Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1221 return;
1222 }
1223 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1224 Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1225 return;
1226 }
1227
1228
1229 //check also that input layout compatible with batch layout
1230 bool badLayout = false;
1231 // case batch depth == batch size
1232 if (batchDepth == batchSize)
1233 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1234 // case batch Height is batch size
1235 if (batchHeight == batchSize && batchDepth == 1)
1236 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1237 if (badLayout) {
1238 Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1239 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1240 return;
1241 }
1242
1243 // check batch size is compatible with number of events
1244 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1245 Log() << kFATAL << "Number of samples in the datasets are train: ("
1246 << nTrainingSamples << ") test: (" << nValidationSamples
1247 << "). One of these is smaller than the batch size of "
1248 << settings.batchSize << ". Please increase the batch"
1249 << " size to be at least the same size as the smallest"
1250 << " of them." << Endl;
1251 }
1252
1253 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1254
1255 // create a copy of DeepNet for evaluating but with batch size = 1
1256 // fNet is the saved network and will be with CPU or Referrence architecture
1257 if (trainingPhase == 1) {
1258 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1259 batchHeight, batchWidth, J, I, R, weightDecay));
1260 fBuildNet = true;
1261 }
1262 else
1263 fBuildNet = false;
1264
1265 // Initialize the vector of slave nets
1266 std::vector<DeepNet_t> nets{};
1267 nets.reserve(nThreads);
1268 for (size_t i = 0; i < nThreads; i++) {
1269 // create a copies of the master deep net
1270 nets.push_back(deepNet);
1271 }
1272
1273
1274 // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1275 CreateDeepNet(deepNet, nets);
1276
1277
1278 // set droput probabilities
1279 // use convention to store in the layer 1.- dropout probabilities
1280 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1281 for (auto & p : dropoutVector) {
1282 p = 1.0 - p;
1283 }
1284 deepNet.SetDropoutProbabilities(dropoutVector);
1285
1286 if (trainingPhase > 1) {
1287 // copy initial weights from fNet to deepnet
1288 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1289 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1290 }
1291 }
1292
1293 // when fNet is built create also input matrix that will be used to evaluate it
1294 if (fBuildNet) {
1295 //int n1 = batchHeight;
1296 //int n2 = batchWidth;
1297 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1298 //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1299 //fXInput = TensorImpl_t(1,n1,n2);
1301 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1302 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1303 fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1304
1305
1306 // create pointer to output matrix used for the predictions
1307 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1308
1309 // print the created network
1310 Log() << "***** Deep Learning Network *****" << Endl;
1311 if (Log().GetMinType() <= kINFO)
1312 deepNet.Print();
1313 }
1314 Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1315
1316 // Loading the training and validation datasets
1317 TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1318 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1319 {inputDepth, inputHeight, inputWidth},
1320 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1321 deepNet.GetOutputWidth(), nThreads);
1322
1323 TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1324 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1325 {inputDepth, inputHeight, inputWidth},
1326 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1327 deepNet.GetOutputWidth(), nThreads);
1328
1329
1330
1331 // do an evaluation of the network to compute initial minimum test error
1332
1333 Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1334
1335 Double_t minValError = 0.0;
1336 Log() << "Compute initial loss on the validation data " << Endl;
1337 for (auto batch : validationData) {
1338 auto inputTensor = batch.GetInput();
1339 auto outputMatrix = batch.GetOutput();
1340 auto weights = batch.GetWeights();
1341
1342 //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1343 // should we apply droput to the loss ??
1344 minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1345 }
1346 // add Regularization term
1347 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1348 minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1349 minValError += regzTerm;
1350
1351
1352 // create a pointer to base class VOptimizer
1353 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1354
1355 // initialize the base class pointer with the corresponding derived class object.
1356 switch (O) {
1357
1358 case EOptimizer::kSGD:
1359 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1360 new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1361 break;
1362
1363 case EOptimizer::kAdam: {
1364 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1366 deepNet, settings.learningRate, settings.optimizerParams["ADAM_beta1"],
1367 settings.optimizerParams["ADAM_beta2"], settings.optimizerParams["ADAM_eps"]));
1368 break;
1369 }
1370
1371 case EOptimizer::kAdagrad:
1372 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1373 new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1374 settings.optimizerParams["ADAGRAD_eps"]));
1375 break;
1376
1377 case EOptimizer::kRMSProp:
1378 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1379 new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum,
1380 settings.optimizerParams["RMSPROP_rho"],
1381 settings.optimizerParams["RMSPROP_eps"]));
1382 break;
1383
1384 case EOptimizer::kAdadelta:
1385 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1386 new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1387 settings.optimizerParams["ADADELTA_rho"],
1388 settings.optimizerParams["ADADELTA_eps"]));
1389 break;
1390 }
1391
1392
1393 // Initialize the vector of batches, one batch for one slave network
1394 std::vector<TTensorBatch<Architecture_t>> batches{};
1395
1396 bool converged = false;
1397 size_t convergenceCount = 0;
1398 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1399
1400 // start measuring
1401 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1402 tstart = std::chrono::system_clock::now();
1403
1404 // function building string with optimizer parameters values for logging
1405 auto optimParametersString = [&]() {
1406 TString optimParameters;
1407 for ( auto & element : settings.optimizerParams) {
1408 TString key = element.first;
1409 key.ReplaceAll(settings.optimizerName + "_", ""); // strip optimizerName_
1410 double value = element.second;
1411 if (!optimParameters.IsNull())
1412 optimParameters += ",";
1413 else
1414 optimParameters += " (";
1415 optimParameters += TString::Format("%s=%g", key.Data(), value);
1416 }
1417 if (!optimParameters.IsNull())
1418 optimParameters += ")";
1419 return optimParameters;
1420 };
1421
1422 Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1423 << " Optimizer " << settings.optimizerName
1424 << optimParametersString()
1425 << " Learning rate = " << settings.learningRate << " regularization " << (char)settings.regularization
1426 << " minimum error = " << minValError << Endl;
1427 if (!fInteractive) {
1428 std::string separator(62, '-');
1429 Log() << separator << Endl;
1430 Log() << std::setw(10) << "Epoch"
1431 << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err." << std::setw(12)
1432 << "t(s)/epoch" << std::setw(12) << "t(s)/Loss" << std::setw(12) << "nEvents/s" << std::setw(12)
1433 << "Conv. Steps" << Endl;
1434 Log() << separator << Endl;
1435 }
1436
1437 // set up generator for shuffling the batches
1438 // if seed is zero we have always a different order in the batches
1439 size_t shuffleSeed = 0;
1440 if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1441 RandomGenerator<TRandom3> rng(shuffleSeed);
1442
1443 // print weights before
1444 if (fBuildNet && debug) {
1445 Log() << "Initial Deep Net Weights " << Endl;
1446 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1447 for (size_t l = 0; l < weights_tensor.size(); ++l)
1448 weights_tensor[l].Print();
1449 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1450 bias_tensor[0].Print();
1451 }
1452
1453 Log() << " Start epoch iteration ..." << Endl;
1454 bool debugFirstEpoch = false;
1455 bool computeLossInTraining = true; // compute loss in training or at test time
1456 size_t nTrainEpochs = 0;
1457 while (!converged) {
1458 nTrainEpochs++;
1459 trainingData.Shuffle(rng);
1460
1461 // execute all epochs
1462 //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1463
1464 Double_t trainingError = 0;
1465 for (size_t i = 0; i < batchesInEpoch; ++i ) {
1466 // Clean and load new batches, one batch for one slave net
1467 //batches.clear();
1468 //batches.reserve(nThreads);
1469 //for (size_t j = 0; j < nThreads; j++) {
1470 // batches.push_back(trainingData.GetTensorBatch());
1471 //}
1472 if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1473
1474 auto my_batch = trainingData.GetTensorBatch();
1475
1476 if (debugFirstEpoch)
1477 std::cout << "got batch data - doing forward \n";
1478
1479#ifdef DEBUG
1480
1481 Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1482 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1483 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1484 Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1485 Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1486#endif
1487
1488 deepNet.Forward(my_batch.GetInput(), true);
1489 // compute also loss
1490 if (computeLossInTraining) {
1491 auto outputMatrix = my_batch.GetOutput();
1492 auto weights = my_batch.GetWeights();
1493 trainingError += deepNet.Loss(outputMatrix, weights, false);
1494 }
1495
1496 if (debugFirstEpoch)
1497 std::cout << "- doing backward \n";
1498
1499#ifdef DEBUG
1500 size_t nlayers = deepNet.GetLayers().size();
1501 for (size_t l = 0; l < nlayers; ++l) {
1502 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1503 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1504 TString::Format("initial weights layer %d", l).Data());
1505
1506 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1507 TString::Format("output tensor layer %d", l).Data());
1508 }
1509#endif
1510
1511 //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1512
1513 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1514
1515 if (debugFirstEpoch)
1516 std::cout << "- doing optimizer update \n";
1517
1518 // increment optimizer step that is used in some algorithms (e.g. ADAM)
1519 optimizer->IncrementGlobalStep();
1520 optimizer->Step();
1521
1522#ifdef DEBUG
1523 std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1524 for (size_t l = 0; l < nlayers; ++l) {
1525 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1526 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1527 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1528 }
1529 }
1530#endif
1531
1532 }
1533
1534 if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1535 //}
1536 debugFirstEpoch = false;
1537 if ((nTrainEpochs % settings.testInterval) == 0) {
1538
1539 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1540
1541 t1 = std::chrono::system_clock::now();
1542
1543 // Compute validation error.
1544
1545
1546 Double_t valError = 0.0;
1547 bool inTraining = false;
1548 for (auto batch : validationData) {
1549 auto inputTensor = batch.GetInput();
1550 auto outputMatrix = batch.GetOutput();
1551 auto weights = batch.GetWeights();
1552 // should we apply droput to the loss ??
1553 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1554 }
1555 // normalize loss to number of batches and add regularization term
1556 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1557 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1558 valError += regTerm;
1559
1560 //Log the loss value
1561 fTrainHistory.AddValue("valError",nTrainEpochs,valError);
1562
1563 t2 = std::chrono::system_clock::now();
1564
1565 // checking for convergence
1566 if (valError < minValError) {
1567 convergenceCount = 0;
1568 } else {
1569 convergenceCount += settings.testInterval;
1570 }
1571
1572 // copy configuration when reached a minimum error
1573 if (valError < minValError ) {
1574 // Copy weights from deepNet to fNet
1575 Log() << std::setw(10) << nTrainEpochs
1576 << " Minimum Test error found - save the configuration " << Endl;
1577 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1578 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1579 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1580 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1581 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1582 // }
1583 }
1584 // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1585 // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1586
1587 minValError = valError;
1588 }
1589 else if ( minValError <= 0. )
1590 minValError = valError;
1591
1592 if (!computeLossInTraining) {
1593 trainingError = 0.0;
1594 // Compute training error.
1595 for (auto batch : trainingData) {
1596 auto inputTensor = batch.GetInput();
1597 auto outputMatrix = batch.GetOutput();
1598 auto weights = batch.GetWeights();
1599 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1600 }
1601 }
1602 // normalize loss to number of batches and add regularization term
1603 trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1604 trainingError += regTerm;
1605
1606 //Log the loss value
1607 fTrainHistory.AddValue("trainingError",nTrainEpochs,trainingError);
1608
1609 // stop measuring
1610 tend = std::chrono::system_clock::now();
1611
1612 // Compute numerical throughput.
1613 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1614 std::chrono::duration<double> elapsed1 = t1-tstart;
1615 // std::chrono::duration<double> elapsed2 = t2-tstart;
1616 // time to compute training and test errors
1617 std::chrono::duration<double> elapsed_testing = tend-t1;
1618
1619 double seconds = elapsed_seconds.count();
1620 // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1621 // nGFlops *= deepnet.GetNFlops() * 1e-9;
1622 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1623
1624 converged =
1625 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1626
1627
1628 Log() << std::setw(10) << nTrainEpochs << " | "
1629 << std::setw(12) << trainingError
1630 << std::setw(12) << valError
1631 << std::setw(12) << seconds / settings.testInterval
1632 << std::setw(12) << elapsed_testing.count()
1633 << std::setw(12) << 1. / eventTime
1634 << std::setw(12) << convergenceCount
1635 << Endl;
1636
1637 if (converged) {
1638 Log() << Endl;
1639 }
1640 tstart = std::chrono::system_clock::now();
1641 }
1642
1643 // if (stepCount % 10 == 0 || converged) {
1644 if (converged && debug) {
1645 Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << nTrainEpochs
1646 << Endl;
1647 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1648 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1649 for (size_t l = 0; l < weights_tensor.size(); ++l)
1650 weights_tensor[l].Print();
1651 bias_tensor[0].Print();
1652 }
1653
1654 }
1655
1656 trainingPhase++;
1657 } // end loop on training Phase
1658}
1659
1660////////////////////////////////////////////////////////////////////////////////
1662{
1663 if (fInteractive) {
1664 Log() << kFATAL << "Not implemented yet" << Endl;
1665 return;
1666 }
1667
1668 // using for training same scalar type defined for the prediction
1669 if (this->GetArchitectureString() == "GPU") {
1670#ifdef R__HAS_TMVAGPU
1671 Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1672#ifdef R__HAS_CUDNN
1673 TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1674#else
1675 TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1676#endif
1677#else
1678 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1679 "you have CUDA installed and it was successfully "
1680 "detected by CMAKE."
1681 << Endl;
1682 return;
1683#endif
1684 } else if (this->GetArchitectureString() == "CPU") {
1685#ifdef R__HAS_TMVACPU
1686 // note that number of threads used for BLAS might be different
1687 // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1688 Log() << kINFO << "Start of deep neural network training on CPU using MT, nthreads = "
1689 << gConfig().GetNCpu() << Endl << Endl;
1690#else
1691 Log() << kINFO << "Start of deep neural network training on single thread CPU (without ROOT-MT support) " << Endl
1692 << Endl;
1693#endif
1694 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1695 return;
1696 }
1697 else {
1698 Log() << kFATAL << this->GetArchitectureString() <<
1699 " is not a supported architecture for TMVA::MethodDL"
1700 << Endl;
1701 }
1702
1703}
1704
1705////////////////////////////////////////////////////////////////////////////////
1707{
1708 // fill the input tensor fXInput from the current Event data
1709 // with the correct shape depending on the model used
1710 // The input tensor is used for network prediction after training
1711 // using a single event. The network batch size must be equal to 1.
1712 // The architecture specified at compile time in ArchitectureImpl_t
1713 // is used. This should be the CPU architecture
1714
1715 if (!fNet || fNet->GetDepth() == 0) {
1716 Log() << kFATAL << "The network has not been trained and fNet is not built" << Endl;
1717 }
1718 if (fNet->GetBatchSize() != 1) {
1719 Log() << kFATAL << "FillINputTensor::Network batch size must be equal to 1 when doing single event predicition" << Endl;
1720 }
1721
1722 // get current event
1723 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1724 size_t nVariables = GetEvent()->GetNVariables();
1725
1726 // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1727 if (fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1728 R__ASSERT(fXInput.GetShape().size() < 4);
1729 size_t nc, nhw = 0;
1730 if (fXInput.GetShape().size() == 2) {
1731 nc = fXInput.GetShape()[0];
1732 if (nc != 1) {
1733 ArchitectureImpl_t::PrintTensor(fXInput);
1734 Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1" << Endl;
1735 }
1736 nhw = fXInput.GetShape()[1];
1737 } else {
1738 nc = fXInput.GetCSize();
1739 nhw = fXInput.GetWSize();
1740 }
1741 if (nVariables != nc * nhw) {
1742 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1743 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw << Endl;
1744 }
1745 for (size_t j = 0; j < nc; j++) {
1746 for (size_t k = 0; k < nhw; k++) {
1747 // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1748 fXInputBuffer[k * nc + j] = inputValues[j * nhw + k]; // for column layout !!!
1749 }
1750 }
1751 } else {
1752 // row-wise layout
1753 assert(fXInput.GetShape().size() >= 4);
1754 size_t nc = fXInput.GetCSize();
1755 size_t nh = fXInput.GetHSize();
1756 size_t nw = fXInput.GetWSize();
1757 size_t n = nc * nh * nw;
1758 if (nVariables != n) {
1759 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1760 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1761 << Endl;
1762 }
1763 for (size_t j = 0; j < n; j++) {
1764 // in this case TMVA event has same order as input tensor
1765 fXInputBuffer[j] = inputValues[j]; // for column layout !!!
1766 }
1767 }
1768 // copy buffer in input
1769 fXInput.GetDeviceBuffer().CopyFrom(fXInputBuffer);
1770 return;
1771}
1772
1773////////////////////////////////////////////////////////////////////////////////
1774Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1775{
1776
1778
1779 // perform the prediction
1780 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1781
1782 // return value
1783 double mvaValue = (*fYHat)(0, 0);
1784
1785 // for debugging
1786#ifdef DEBUG_MVAVALUE
1787 using Tensor_t = std::vector<MatrixImpl_t>;
1788 TMatrixF xInput(n1,n2, inputValues.data() );
1789 std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1790 xInput.Print();
1791 std::cout << "Output of DeepNet " << mvaValue << std::endl;
1792 auto & deepnet = *fNet;
1793 std::cout << "Loop on layers " << std::endl;
1794 for (int l = 0; l < deepnet.GetDepth(); ++l) {
1795 std::cout << "Layer " << l;
1796 const auto * layer = deepnet.GetLayerAt(l);
1797 const Tensor_t & layer_output = layer->GetOutput();
1798 layer->Print();
1799 std::cout << "DNN output " << layer_output.size() << std::endl;
1800 for (size_t i = 0; i < layer_output.size(); ++i) {
1801#ifdef R__HAS_TMVAGPU
1802 //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1803 TMatrixD m = layer_output[i];
1804#else
1805 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1806#endif
1807 m.Print();
1808 }
1809 const Tensor_t & layer_weights = layer->GetWeights();
1810 std::cout << "DNN weights " << layer_weights.size() << std::endl;
1811 if (layer_weights.size() > 0) {
1812 int i = 0;
1813#ifdef R__HAS_TMVAGPU
1814 TMatrixD m = layer_weights[i];
1815// TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1816#else
1817 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1818#endif
1819 m.Print();
1820 }
1821 }
1822#endif
1823
1824 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1825}
1826////////////////////////////////////////////////////////////////////////////////
1827/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1828////////////////////////////////////////////////////////////////////////////////
1829template <typename Architecture_t>
1830std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1831{
1832
1833 // Check whether the model is setup
1834 if (!fNet || fNet->GetDepth() == 0) {
1835 Log() << kFATAL << "The network has not been trained and fNet is not built"
1836 << Endl;
1837 }
1838
1839 // rebuild the networks
1840 this->SetBatchSize(batchSize);
1841 size_t inputDepth = this->GetInputDepth();
1842 size_t inputHeight = this->GetInputHeight();
1843 size_t inputWidth = this->GetInputWidth();
1844 size_t batchDepth = this->GetBatchDepth();
1845 size_t batchHeight = this->GetBatchHeight();
1846 size_t batchWidth = this->GetBatchWidth();
1847 ELossFunction J = fNet->GetLossFunction();
1848 EInitialization I = fNet->GetInitialization();
1849 ERegularization R = fNet->GetRegularization();
1850 Double_t weightDecay = fNet->GetWeightDecay();
1851
1852 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1853 using Matrix_t = typename Architecture_t::Matrix_t;
1854 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1855
1856 // create the deep neural network
1857 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1858 std::vector<DeepNet_t> nets{};
1859 fBuildNet = false;
1860 CreateDeepNet(deepNet,nets);
1861
1862 // copy weights from the saved fNet to the built DeepNet
1863 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1864 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1865 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1866 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1867 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1868 // }
1869 }
1870
1871 size_t n1 = deepNet.GetBatchHeight();
1872 size_t n2 = deepNet.GetBatchWidth();
1873 size_t n0 = deepNet.GetBatchSize();
1874 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1875 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1876 n1 = deepNet.GetBatchSize();
1877 n0 = 1;
1878 }
1879 //this->SetBatchDepth(n0);
1880 Long64_t nEvents = lastEvt - firstEvt;
1881 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1882 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1883
1884
1885 // Tensor_t xInput;
1886 // for (size_t i = 0; i < n0; ++i)
1887 // xInput.emplace_back(Matrix_t(n1,n2));
1888
1889 // create pointer to output matrix used for the predictions
1890 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1891
1892 // use timer
1893 Timer timer( nEvents, GetName(), kTRUE );
1894
1895 if (logProgress)
1896 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1897 << "Evaluation of " << GetMethodName() << " on "
1898 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1899 << " sample (" << nEvents << " events)" << Endl;
1900
1901
1902 // eventg loop
1903 std::vector<double> mvaValues(nEvents);
1904
1905
1906 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1907
1908 Long64_t ievt_end = ievt + batchSize;
1909 // case of batch prediction for
1910 if (ievt_end <= lastEvt) {
1911
1912 if (ievt == firstEvt) {
1913 Data()->SetCurrentEvent(ievt);
1914 size_t nVariables = GetEvent()->GetNVariables();
1915
1916 if (n1 == batchSize && n0 == 1) {
1917 if (n2 != nVariables) {
1918 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1919 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1920 << Endl;
1921 }
1922 } else {
1923 if (n1*n2 != nVariables || n0 != batchSize) {
1924 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1925 << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1926 << Endl;
1927 }
1928 }
1929 }
1930
1931 auto batch = testData.GetTensorBatch();
1932 auto inputTensor = batch.GetInput();
1933
1934 auto xInput = batch.GetInput();
1935 // make the prediction
1936 deepNet.Prediction(yHat, xInput, fOutputFunction);
1937 for (size_t i = 0; i < batchSize; ++i) {
1938 double value = yHat(i,0);
1939 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1940 }
1941 }
1942 else {
1943 // case of remaining events: compute prediction by single event !
1944 for (Long64_t i = ievt; i < lastEvt; ++i) {
1945 Data()->SetCurrentEvent(i);
1946 mvaValues[i] = GetMvaValue();
1947 }
1948 }
1949 }
1950
1951 if (logProgress) {
1952 Log() << kINFO
1953 << "Elapsed time for evaluation of " << nEvents << " events: "
1954 << timer.GetElapsedTime() << " " << Endl;
1955 }
1956
1957 return mvaValues;
1958}
1959
1960//////////////////////////////////////////////////////////////////////////
1961/// Get the regression output values for a single event
1962//////////////////////////////////////////////////////////////////////////
1963const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1964{
1965
1966 FillInputTensor ();
1967
1968 // perform the network prediction
1969 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1970
1971 size_t nTargets = DataInfo().GetNTargets();
1972 R__ASSERT(nTargets == fYHat->GetNcols());
1973
1974 std::vector<Float_t> output(nTargets);
1975 for (size_t i = 0; i < nTargets; i++)
1976 output[i] = (*fYHat)(0, i);
1977
1978 // ned to transform back output values
1979 if (fRegressionReturnVal == NULL)
1980 fRegressionReturnVal = new std::vector<Float_t>(nTargets);
1981 R__ASSERT(fRegressionReturnVal->size() == nTargets);
1982
1983 // N.B. one should cache here temporary event class
1984 Event *evT = new Event(*GetEvent());
1985 for (size_t i = 0; i < nTargets; ++i) {
1986 evT->SetTarget(i, output[i]);
1987 }
1988 const Event *evT2 = GetTransformationHandler().InverseTransform(evT);
1989 for (size_t i = 0; i < nTargets; ++i) {
1990 (*fRegressionReturnVal)[i] = evT2->GetTarget(i);
1991 }
1992 delete evT;
1993 return *fRegressionReturnVal;
1994}
1995//////////////////////////////////////////////////////////////////////////
1996/// Get the multi-class output values for a single event
1997//////////////////////////////////////////////////////////////////////////
1998const std::vector<Float_t> &TMVA::MethodDL::GetMulticlassValues()
1999{
2000
2001 FillInputTensor();
2002
2003 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
2004
2005 size_t nClasses = DataInfo().GetNClasses();
2006 R__ASSERT(nClasses == fYHat->GetNcols());
2007
2008 if (fMulticlassReturnVal == NULL) {
2009 fMulticlassReturnVal = new std::vector<Float_t>(nClasses);
2010 }
2011 R__ASSERT(fMulticlassReturnVal->size() == nClasses);
2012
2013 for (size_t i = 0; i < nClasses; i++) {
2014 (*fMulticlassReturnVal)[i] = (*fYHat)(0, i);
2015 }
2016 return *fMulticlassReturnVal;
2017}
2018
2019////////////////////////////////////////////////////////////////////////////////
2020/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
2021/// Here we will evaluate using a default batch size and the same architecture used for
2022/// Training
2023////////////////////////////////////////////////////////////////////////////////
2024std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
2025{
2026
2027 Long64_t nEvents = Data()->GetNEvents();
2028 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2029 if (firstEvt < 0) firstEvt = 0;
2030 nEvents = lastEvt-firstEvt;
2031
2032 // use same batch size as for training (from first strategy)
2033 size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2034 size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2035 if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
2036
2037 // using for training same scalar type defined for the prediction
2038 if (this->GetArchitectureString() == "GPU") {
2039#ifdef R__HAS_TMVAGPU
2040 Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2041#ifdef R__HAS_CUDNN
2042 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2043#else
2044 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2045#endif
2046
2047#endif
2048 }
2049 Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2050 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2051}
2052////////////////////////////////////////////////////////////////////////////////
2053void MethodDL::AddWeightsXMLTo(void * parent) const
2054{
2055 // Create the parent XML node with name "Weights"
2056 auto & xmlEngine = gTools().xmlengine();
2057 void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2058
2059 /*! Get all necessary information, in order to be able to reconstruct the net
2060 * if we read the same XML file. */
2061
2062 // Deep Net specific info
2063 Int_t depth = fNet->GetDepth();
2064
2065 Int_t inputDepth = fNet->GetInputDepth();
2066 Int_t inputHeight = fNet->GetInputHeight();
2067 Int_t inputWidth = fNet->GetInputWidth();
2068
2069 Int_t batchSize = fNet->GetBatchSize();
2070
2071 Int_t batchDepth = fNet->GetBatchDepth();
2072 Int_t batchHeight = fNet->GetBatchHeight();
2073 Int_t batchWidth = fNet->GetBatchWidth();
2074
2075 char lossFunction = static_cast<char>(fNet->GetLossFunction());
2076 char initialization = static_cast<char>(fNet->GetInitialization());
2077 char regularization = static_cast<char>(fNet->GetRegularization());
2078
2079 Double_t weightDecay = fNet->GetWeightDecay();
2080
2081 // Method specific info (not sure these are needed)
2082 char outputFunction = static_cast<char>(this->GetOutputFunction());
2083 //char lossFunction = static_cast<char>(this->GetLossFunction());
2084
2085 // Add attributes to the parent node
2086 xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2087
2088 xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2089 xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2090 xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2091
2092 xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2093 xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2094 xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2095 xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2096
2097 xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2098 xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2099 xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2100 xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2101
2102 gTools().AddAttr(nn, "WeightDecay", weightDecay);
2103
2104
2105 for (Int_t i = 0; i < depth; i++)
2106 {
2107 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2108 }
2109
2110
2111}
2112
2113////////////////////////////////////////////////////////////////////////////////
2115{
2116
2117 auto netXML = gTools().GetChild(rootXML, "Weights");
2118 if (!netXML){
2119 netXML = rootXML;
2120 }
2121
2122 size_t netDepth;
2123 gTools().ReadAttr(netXML, "NetDepth", netDepth);
2124
2125 size_t inputDepth, inputHeight, inputWidth;
2126 gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2127 gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2128 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2129
2130 size_t batchSize, batchDepth, batchHeight, batchWidth;
2131 gTools().ReadAttr(netXML, "BatchSize", batchSize);
2132 // use always batchsize = 1
2133 //batchSize = 1;
2134 gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2135 gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2136 gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2137
2138 char lossFunctionChar;
2139 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2140 char initializationChar;
2141 gTools().ReadAttr(netXML, "Initialization", initializationChar);
2142 char regularizationChar;
2143 gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2144 char outputFunctionChar;
2145 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2146 double weightDecay;
2147 gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2148
2149 // create the net
2150
2151 // DeepNetCpu_t is defined in MethodDL.h
2152 this->SetInputDepth(inputDepth);
2153 this->SetInputHeight(inputHeight);
2154 this->SetInputWidth(inputWidth);
2155 this->SetBatchDepth(batchDepth);
2156 this->SetBatchHeight(batchHeight);
2157 this->SetBatchWidth(batchWidth);
2158
2159
2160
2161 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2162 batchHeight, batchWidth,
2163 static_cast<ELossFunction>(lossFunctionChar),
2164 static_cast<EInitialization>(initializationChar),
2165 static_cast<ERegularization>(regularizationChar),
2166 weightDecay));
2167
2168 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
2169
2170
2171 //size_t previousWidth = inputWidth;
2172 auto layerXML = gTools().xmlengine().GetChild(netXML);
2173
2174 // loop on the layer and add them to the network
2175 for (size_t i = 0; i < netDepth; i++) {
2176
2177 TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2178
2179 // case of dense layer
2180 if (layerName == "DenseLayer") {
2181
2182 // read width and activation function and then we can create the layer
2183 size_t width = 0;
2184 gTools().ReadAttr(layerXML, "Width", width);
2185
2186 // Read activation function.
2187 TString funcString;
2188 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2189 EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2190
2191
2192 fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2193
2194 }
2195 // Convolutional Layer
2196 else if (layerName == "ConvLayer") {
2197
2198 // read width and activation function and then we can create the layer
2199 size_t depth = 0;
2200 gTools().ReadAttr(layerXML, "Depth", depth);
2201 size_t fltHeight, fltWidth = 0;
2202 size_t strideRows, strideCols = 0;
2203 size_t padHeight, padWidth = 0;
2204 gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2205 gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2206 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2207 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2208 gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2209 gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2210
2211 // Read activation function.
2212 TString funcString;
2213 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2214 EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2215
2216
2217 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2218 padHeight, padWidth, actFunction);
2219
2220 }
2221
2222 // MaxPool Layer
2223 else if (layerName == "MaxPoolLayer") {
2224
2225 // read maxpool layer info
2226 size_t filterHeight, filterWidth = 0;
2227 size_t strideRows, strideCols = 0;
2228 gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2229 gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2230 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2231 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2232
2233 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2234 }
2235 // Reshape Layer
2236 else if (layerName == "ReshapeLayer") {
2237
2238 // read reshape layer info
2239 size_t depth, height, width = 0;
2240 gTools().ReadAttr(layerXML, "Depth", depth);
2241 gTools().ReadAttr(layerXML, "Height", height);
2242 gTools().ReadAttr(layerXML, "Width", width);
2243 int flattening = 0;
2244 gTools().ReadAttr(layerXML, "Flattening",flattening );
2245
2246 fNet->AddReshapeLayer(depth, height, width, flattening);
2247
2248 }
2249 // RNN Layer
2250 else if (layerName == "RNNLayer") {
2251
2252 // read RNN layer info
2253 size_t stateSize,inputSize, timeSteps = 0;
2254 int rememberState= 0;
2255 int returnSequence = 0;
2256 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2257 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2258 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2259 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2260 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2261
2262 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2263
2264 }
2265 // LSTM Layer
2266 else if (layerName == "LSTMLayer") {
2267
2268 // read RNN layer info
2269 size_t stateSize,inputSize, timeSteps = 0;
2270 int rememberState, returnSequence = 0;
2271 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2272 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2273 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2274 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2275 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2276
2277 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2278
2279 }
2280 // GRU Layer
2281 else if (layerName == "GRULayer") {
2282
2283 // read RNN layer info
2284 size_t stateSize,inputSize, timeSteps = 0;
2285 int rememberState, returnSequence, resetGateAfter = 0;
2286 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2287 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2288 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2289 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2290 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2291 gTools().ReadAttr(layerXML, "ResetGateAfter", resetGateAfter);
2292
2293 if (!resetGateAfter && ArchitectureImpl_t::IsCudnn())
2294 Warning("ReadWeightsFromXML",
2295 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2296
2297 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2298 }
2299 // BatchNorm Layer
2300 else if (layerName == "BatchNormLayer") {
2301 // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2302 fNet->AddBatchNormLayer(0., 0.0);
2303 }
2304 // read weights and biases
2305 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2306
2307 // read next layer
2308 layerXML = gTools().GetNextChild(layerXML);
2309 }
2310
2311 fBuildNet = false;
2312 // create now the input and output matrices
2313 //int n1 = batchHeight;
2314 //int n2 = batchWidth;
2315 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2316 //if (fXInput.size() > 0) fXInput.clear();
2317 //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2319 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2320 // make here a ColumnMajor tensor
2321 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth(),TMVA::Experimental::MemoryLayout::ColumnMajor );
2323
2324 // create pointer to output matrix used for the predictions
2325 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2326
2327
2328}
2329
2330
2331////////////////////////////////////////////////////////////////////////////////
2332void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2333{
2334}
2335
2336////////////////////////////////////////////////////////////////////////////////
2338{
2339 // TODO
2340 return NULL;
2341}
2342
2343////////////////////////////////////////////////////////////////////////////////
2345{
2346 // TODO
2347}
2348
2349} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition RSha256.hxx:103
unsigned int UInt_t
Definition RtypesCore.h:46
const Bool_t kFALSE
Definition RtypesCore.h:101
double Double_t
Definition RtypesCore.h:59
long long Long64_t
Definition RtypesCore.h:80
const Bool_t kTRUE
Definition RtypesCore.h:100
#define ClassImp(name)
Definition Rtypes.h:364
include TDocParser_001 C image html pict1_TDocParser_001 png width
#define R__ASSERT(e)
Definition TError.h:118
int type
Definition TGX11.cxx:121
char * Form(const char *fmt,...)
The Formula class.
Definition TFormula.h:87
Double_t Eval(Double_t x) const
Sets first variable (e.g. x) and evaluate formula.
UInt_t GetNCpu()
Definition Config.h:70
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
MsgLogger & Log() const
Adadelta Optimizer class.
Definition Adadelta.h:45
Adagrad Optimizer class.
Definition Adagrad.h:45
Adam Optimizer class.
Definition Adam.h:45
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition Cpu.h:108
static bool IsCudnn()
Definition Cpu.h:131
Generic Deep Neural Network class.
Definition DeepNet.h:73
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition DeepNet.h:825
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
Definition DeepNet.h:608
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition DeepNet.h:740
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
Definition DeepNet.h:567
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition DeepNet.h:485
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition DeepNet.h:439
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition DeepNet.h:773
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition DeepNet.h:524
Generic layer class.
Definition DenseLayer.h:59
RMSProp Optimizer class.
Definition RMSProp.h:45
Stochastic Batch Gradient Descent Optimizer class.
Definition SGD.h:46
Generic General Layer class.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Class that contains all the data information.
Definition DataSetInfo.h:62
UInt_t GetNClasses() const
Types::ETreeType GetCurrentType() const
Definition DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition Event.cxx:367
UInt_t GetNVariables() const
accessor to the number of variables
Definition Event.cxx:316
UInt_t GetClass() const
Definition Event.h:86
Virtual base Class for all MVA method.
Definition MethodBase.h:111
const char * GetName() const
Definition MethodBase.h:334
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition MethodBase.h:686
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
Definition MethodBase.h:346
const TString & GetMethodName() const
Definition MethodBase.h:331
const Event * GetEvent() const
Definition MethodBase.h:751
DataSetInfo & DataInfo() const
Definition MethodBase.h:410
UInt_t GetNVariables() const
Definition MethodBase.h:345
Types::EAnalysisType fAnalysisType
Definition MethodBase.h:595
UInt_t GetNvar() const
Definition MethodBase.h:344
TrainingHistory fTrainHistory
Definition MethodBase.h:425
DataSet * Data() const
Definition MethodBase.h:409
IPythonInteractive * fInteractive
temporary dataset used when evaluating on a different data (used by MethodCategory::GetMvaValues)
Definition MethodBase.h:448
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition MethodDL.h:108
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition MethodDL.h:183
void GetHelpMessage() const
DNN::ELossFunction fLossFunction
The loss function.
Definition MethodDL.h:190
std::vector< size_t > fInputShape
Contains the batch size (no.
Definition MethodDL.h:178
TString fLayoutString
The string defining the layout of the deep net.
Definition MethodDL.h:194
void SetInputDepth(int inputDepth)
Setters.
Definition MethodDL.h:286
std::unique_ptr< MatrixImpl_t > fYHat
Definition MethodDL.h:208
void Train()
Methods for training the deep learning network.
size_t GetBatchHeight() const
Definition MethodDL.h:263
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class Here we will evaluate...
TString fWeightInitializationString
The string defining the weight initialization method.
Definition MethodDL.h:197
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition MethodDL.cxx:770
TensorImpl_t fXInput
Definition MethodDL.h:206
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition MethodDL.h:186
virtual const std::vector< Float_t > & GetMulticlassValues()
TString fArchitectureString
The string defining the architecure: CPU or GPU.
Definition MethodDL.h:198
void Init()
default initializations
Definition MethodDL.cxx:432
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
void TrainDeepNet()
train of deep neural network using the defined architecture
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition MethodDL.h:280
DNN::EOutputFunction GetOutputFunction() const
Definition MethodDL.h:269
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition MethodDL.cxx:583
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition MethodDL.h:273
void SetInputWidth(int inputWidth)
Definition MethodDL.h:288
void ProcessOptions()
Definition MethodDL.cxx:219
HostBufferImpl_t fXInputBuffer
Definition MethodDL.h:207
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition MethodDL.h:184
size_t GetInputDepth() const
Definition MethodDL.h:255
std::unique_ptr< DeepNetImpl_t > fNet
Definition MethodDL.h:209
TString GetInputLayoutString() const
Definition MethodDL.h:272
void SetBatchHeight(size_t batchHeight)
Definition MethodDL.h:293
size_t GetInputHeight() const
Definition MethodDL.h:256
TString GetArchitectureString() const
Definition MethodDL.h:278
void ParseBatchLayout()
Parse the input layout.
Definition MethodDL.cxx:482
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition MethodDL.cxx:892
void ReadWeightsFromStream(std::istream &)
void ReadWeightsFromXML(void *wghtnode)
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition MethodDL.h:199
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition MethodDL.h:93
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition MethodDL.h:189
DNN::EInitialization fWeightInitialization
The initialization method.
Definition MethodDL.h:188
size_t GetBatchDepth() const
Definition MethodDL.h:262
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition MethodDL.cxx:933
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition MethodDL.h:204
size_t GetInputWidth() const
Definition MethodDL.h:257
void SetInputShape(std::vector< size_t > inputShape)
Definition MethodDL.h:289
DNN::ELossFunction GetLossFunction() const
Definition MethodDL.h:270
TString fBatchLayoutString
The string defining the layout of the batch.
Definition MethodDL.h:193
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition MethodDL.cxx:671
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition MethodDL.cxx:831
virtual const std::vector< Float_t > & GetRegressionValues()
TString fTrainingStrategyString
The string defining the training strategy.
Definition MethodDL.h:196
const Ranking * CreateRanking()
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition MethodDL.h:110
void SetBatchDepth(size_t batchDepth)
Definition MethodDL.h:292
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
void SetBatchWidth(size_t batchWidth)
Definition MethodDL.h:294
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
DNN::EInitialization GetWeightInitialization() const
Definition MethodDL.h:268
void SetBatchSize(size_t batchSize)
Definition MethodDL.h:291
TString GetLayoutString() const
Definition MethodDL.h:274
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition MethodDL.h:182
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition MethodDL.h:106
size_t GetBatchWidth() const
Definition MethodDL.h:264
void AddWeightsXMLTo(void *parent) const
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition MethodDL.h:107
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
virtual ~MethodDL()
Virtual Destructor.
void ParseInputLayout()
Parse the input layout.
Definition MethodDL.cxx:439
void FillInputTensor()
Get the input event tensor for evaluation Internal function to fill the fXInput tensor with the corre...
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition MethodDL.h:201
void SetInputHeight(int inputHeight)
Definition MethodDL.h:287
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition MethodDL.cxx:529
TString fErrorStrategy
The string defining the error strategy for training.
Definition MethodDL.h:195
void DeclareOptions()
The option handling methods.
Definition MethodDL.cxx:167
TString fInputLayoutString
The string defining the layout of the input.
Definition MethodDL.h:192
EMsgType GetMinType() const
Definition MsgLogger.h:69
Ranking for variables in method (implementation)
Definition Ranking.h:48
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition Timer.cxx:146
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition Tools.cxx:1162
void * GetChild(void *parent, const char *childname=0)
get child node
Definition Tools.cxx:1150
TXMLEngine & xmlengine()
Definition Tools.h:262
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
TString StringFromInt(Long_t i)
string tools
Definition Tools.cxx:1223
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kMulticlass
Definition Types.h:129
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
@ kFATAL
Definition Types.h:61
void Print(Option_t *name="") const
Print the matrix as a table of elements.
TMatrixT.
Definition TMatrixT.h:39
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition TNamed.cxx:128
An array of TObjects.
Definition TObjArray.h:31
Collectable string class.
Definition TObjString.h:28
const TString & GetString() const
Definition TObjString.h:46
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition TObject.cxx:949
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition TObject.cxx:963
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition TObject.cxx:622
Basic string class.
Definition TString.h:136
Ssiz_t Length() const
Definition TString.h:410
Int_t Atoi() const
Return integer value of string.
Definition TString.cxx:1946
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition TString.cxx:1131
Double_t Atof() const
Return floating-point value contained in string.
Definition TString.cxx:2012
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition TString.cxx:1816
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition TString.cxx:523
const char * Data() const
Definition TString.h:369
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition TString.h:692
@ kTrailing
Definition TString.h:267
@ kBoth
Definition TString.h:267
void ToUpper()
Change string to upper case.
Definition TString.cxx:1163
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition TString.cxx:2222
Bool_t IsNull() const
Definition TString.h:407
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2336
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
const Int_t n
Definition legend1.C:16
#define I(x, y, z)
EOptimizer
Enum representing the optimizer used for training.
Definition Functions.h:82
EOutputFunction
Enum that represents output functions.
Definition Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition Functions.h:238
ERegularization
Enum representing the regularization type applied for a given layer.
Definition Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition Functions.h:57
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition DataLoader.h:40
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition MethodDL.cxx:75
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Bool_t IsNaN(Double_t x)
Definition TMath.h:842
Double_t Log(Double_t x)
Definition TMath.h:710
All of the options that can be specified in the training string.
Definition MethodDL.h:72
std::map< TString, double > optimizerParams
Definition MethodDL.h:84
DNN::EOptimizer optimizer
Definition MethodDL.h:78
DNN::ERegularization regularization
Definition MethodDL.h:77
std::vector< Double_t > dropoutProbabilities
Definition MethodDL.h:83
auto * m
Definition textangle.C:8
auto * l
Definition textangle.C:4
auto * t1
Definition textangle.C:20
REAL epsilon
Definition triangle.c:618
static void output(int code)
Definition gifencode.c:226