Logo ROOT  
Reference Guide
MethodDL.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/cnn:$Id$Ndl
2// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3/**********************************************************************************
4 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5 * Package: TMVA *
6 * Class : MethodDL *
7 * Web : http://tmva.sourceforge.net *
8 * *
9 * Description: *
10 * Deep Neural Network Method *
11 * *
12 * Authors (alphabetical): *
13 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28#include "TFormula.h"
29#include "TString.h"
30#include "TMath.h"
31#include "TObjString.h"
32
33#include "TMVA/Tools.h"
34#include "TMVA/Configurable.h"
35#include "TMVA/IMethod.h"
37#include "TMVA/MethodDL.h"
38#include "TMVA/Types.h"
40#include "TMVA/DNN/Functions.h"
42#include "TMVA/DNN/SGD.h"
43#include "TMVA/DNN/Adam.h"
44#include "TMVA/DNN/Adagrad.h"
45#include "TMVA/DNN/RMSProp.h"
46#include "TMVA/DNN/Adadelta.h"
47#include "TMVA/Timer.h"
48
49#ifdef R__HAS_TMVAGPU
51#ifdef R__HAS_CUDNN
53#endif
54#endif
55
56#include <chrono>
57
60
61using namespace TMVA::DNN::CNN;
62using namespace TMVA::DNN;
63
69
70
71namespace TMVA {
72
73
74////////////////////////////////////////////////////////////////////////////////
75TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
76{
77 key.ToUpper();
78 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
79 if (it == keyValueMap.end()) {
80 return TString("");
81 }
82 return it->second;
83}
84
85////////////////////////////////////////////////////////////////////////////////
86template <typename T>
87T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
88
89////////////////////////////////////////////////////////////////////////////////
90template <>
91int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
92{
93 TString value(fetchValueTmp(keyValueMap, key));
94 if (value == "") {
95 return defaultValue;
96 }
97 return value.Atoi();
98}
99
100////////////////////////////////////////////////////////////////////////////////
101template <>
102double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
103{
104 TString value(fetchValueTmp(keyValueMap, key));
105 if (value == "") {
106 return defaultValue;
107 }
108 return value.Atof();
109}
110
111////////////////////////////////////////////////////////////////////////////////
112template <>
113TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
114{
115 TString value(fetchValueTmp(keyValueMap, key));
116 if (value == "") {
117 return defaultValue;
118 }
119 return value;
120}
121
122////////////////////////////////////////////////////////////////////////////////
123template <>
124bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
125{
126 TString value(fetchValueTmp(keyValueMap, key));
127 if (value == "") {
128 return defaultValue;
129 }
130
131 value.ToUpper();
132 if (value == "TRUE" || value == "T" || value == "1") {
133 return true;
134 }
135
136 return false;
137}
138
139////////////////////////////////////////////////////////////////////////////////
140template <>
141std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
142 std::vector<double> defaultValue)
143{
144 TString parseString(fetchValueTmp(keyValueMap, key));
145 if (parseString == "") {
146 return defaultValue;
147 }
148
149 parseString.ToUpper();
150 std::vector<double> values;
151
152 const TString tokenDelim("+");
153 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
154 TIter nextToken(tokenStrings);
155 TObjString *tokenString = (TObjString *)nextToken();
156 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
157 std::stringstream sstr;
158 double currentValue;
159 sstr << tokenString->GetString().Data();
160 sstr >> currentValue;
161 values.push_back(currentValue);
162 }
163 return values;
164}
165
166////////////////////////////////////////////////////////////////////////////////
168{
169 // Set default values for all option strings
170
171 DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
172
173 DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
174
175 DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
176
177 DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
178 " or cross entropy (binary classification).");
179 AddPreDefVal(TString("CROSSENTROPY"));
180 AddPreDefVal(TString("SUMOFSQUARES"));
181 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
182
183 DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
184 AddPreDefVal(TString("XAVIER"));
185 AddPreDefVal(TString("XAVIERUNIFORM"));
186 AddPreDefVal(TString("GAUSS"));
187 AddPreDefVal(TString("UNIFORM"));
188 AddPreDefVal(TString("IDENTITY"));
189 AddPreDefVal(TString("ZERO"));
190
191 DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
192
193 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
194 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
195 "Specify as 100 to use exactly 100 events. (Default: 20%)");
196
197 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
198 AddPreDefVal(TString("STANDARD")); // deprecated and not supported anymore
199 AddPreDefVal(TString("CPU"));
200 AddPreDefVal(TString("GPU"));
201 AddPreDefVal(TString("OPENCL")); // not yet implemented
202 AddPreDefVal(TString("CUDNN")); // not needed (by default GPU is now CUDNN if available)
203
204 // define training strategy separated by a separator "|"
205 DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-3,"
206 "Momentum=0.0,"
207 "ConvergenceSteps=100,"
208 "MaxEpochs=2000,"
209 "Optimizer=ADAM,"
210 "BatchSize=30,"
211 "TestRepetitions=1,"
212 "WeightDecay=0.0,"
213 "Regularization=None,"
214 "DropConfig=0.0",
215 "TrainingStrategy", "Defines the training strategies.");
216}
217
218////////////////////////////////////////////////////////////////////////////////
220{
221
223 Log() << kINFO << "Will ignore negative events in training!" << Endl;
224 }
225
226 if (fArchitectureString == "STANDARD") {
227 Log() << kWARNING << "The STANDARD architecture is not supported anymore. "
228 "Please use Architecture=CPU or Architecture=CPU."
229 "See the TMVA Users' Guide for instructions if you "
230 "encounter problems."
231 << Endl;
232 Log() << kINFO << "We will use instead the CPU architecture" << Endl;
233 fArchitectureString = "CPU";
234 }
235 if (fArchitectureString == "OPENCL") {
236 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
237 "Please use Architecture=CPU or Architecture=CPU for the "
238 "time being. See the TMVA Users' Guide for instructions "
239 "if you encounter problems."
240 << Endl;
241 // use instead GPU
242 Log() << kINFO << "We will try using the GPU-CUDA architecture if available" << Endl;
243 fArchitectureString = "GPU";
244 }
245
246 // the architecture can now be set at runtime as an option
247
248
249 if (fArchitectureString == "GPU" || fArchitectureString == "CUDNN") {
250#ifdef R__HAS_TMVAGPU
251 Log() << kINFO << "Will now use the GPU architecture !" << Endl;
252#else // case TMVA does not support GPU
253 Log() << kERROR << "CUDA backend not enabled. Please make sure "
254 "you have CUDA installed and it was successfully "
255 "detected by CMAKE by using -Dtmva-gpu=On "
256 << Endl;
257 fArchitectureString = "CPU";
258 Log() << kINFO << "Will now use instead the CPU architecture !" << Endl;
259#endif
260 }
261
262 if (fArchitectureString == "CPU") {
263#ifdef R__HAS_TMVACPU // TMVA has CPU BLAS and IMT support
264 Log() << kINFO << "Will now use the CPU architecture with BLAS and IMT support !" << Endl;
265#else // TMVA has no CPU BLAS or IMT support
266 Log() << kINFO << "Multi-core CPU backend not enabled. For better performances, make sure "
267 "you have a BLAS implementation and it was successfully "
268 "detected by CMake as well that the imt CMake flag is set."
269 << Endl;
270 Log() << kINFO << "Will use anyway the CPU architecture but with slower performance" << Endl;
271#endif
272 }
273
274 // Input Layout
277
278 // Loss function and output.
281 if (fErrorStrategy == "SUMOFSQUARES") {
282 fLossFunction = ELossFunction::kMeanSquaredError;
283 }
284 if (fErrorStrategy == "CROSSENTROPY") {
286 }
288 } else if (fAnalysisType == Types::kRegression) {
289 if (fErrorStrategy != "SUMOFSQUARES") {
290 Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
291 << " neural net error function. Setting error function to "
292 << " SUMOFSQUARES now." << Endl;
293 }
294
295 fLossFunction = ELossFunction::kMeanSquaredError;
297 } else if (fAnalysisType == Types::kMulticlass) {
298 if (fErrorStrategy == "SUMOFSQUARES") {
299 fLossFunction = ELossFunction::kMeanSquaredError;
300 }
301 if (fErrorStrategy == "CROSSENTROPY") {
303 }
304 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
305 fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
306 }
308 }
309
310 // Initialization
311 // the biases will be always initialized to zero
312 if (fWeightInitializationString == "XAVIER") {
314 } else if (fWeightInitializationString == "XAVIERUNIFORM") {
316 } else if (fWeightInitializationString == "GAUSS") {
318 } else if (fWeightInitializationString == "UNIFORM") {
320 } else if (fWeightInitializationString == "ZERO") {
322 } else if (fWeightInitializationString == "IDENTITY") {
324 } else {
326 }
327
328 // Training settings.
329
331 for (auto &block : strategyKeyValues) {
332 TTrainingSettings settings;
333
334 settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
335 settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
336 settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
337 settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
338 settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
339 settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
340 settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
341 settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
342
343 TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
344 if (regularization == "L1") {
346 } else if (regularization == "L2") {
348 } else {
350 }
351
352 TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
353 settings.optimizerName = optimizer;
354 if (optimizer == "SGD") {
356 } else if (optimizer == "ADAM") {
358 } else if (optimizer == "ADAGRAD") {
360 } else if (optimizer == "RMSPROP") {
362 } else if (optimizer == "ADADELTA") {
364 } else {
365 // Make Adam as default choice if the input string is
366 // incorrect.
368 settings.optimizerName = "ADAM";
369 }
370 // check for specific optimizer parameters
371 std::vector<TString> optimParamLabels = {"_beta1", "_beta2", "_eps", "_rho"};
372 //default values
373 std::map<TString, double> defaultValues = {
374 {"ADADELTA_eps", 1.E-8}, {"ADADELTA_rho", 0.95},
375 {"ADAGRAD_eps", 1.E-8},
376 {"ADAM_beta1", 0.9}, {"ADAM_beta2", 0.999}, {"ADAM_eps", 1.E-7},
377 {"RMSPROP_eps", 1.E-7}, {"RMSPROP_rho", 0.9},
378 };
379 for (auto &pN : optimParamLabels) {
380 TString optimParamName = settings.optimizerName + pN;
381 // check if optimizer has default values for this specific parameters
382 if (defaultValues.count(optimParamName) > 0) {
383 double defValue = defaultValues[optimParamName];
384 double val = fetchValueTmp(block, optimParamName, defValue);
385 // create entry in settings for this optimizer parameter
386 settings.optimizerParams[optimParamName] = val;
387 }
388 }
389
390 fTrainingSettings.push_back(settings);
391 }
392
393 // this set fInputShape[0] = batchSize
394 this->SetBatchSize(fTrainingSettings.front().batchSize);
395
396 // case inputlayout and batch layout was not given. Use default then
397 // (1, batchsize, nvariables)
398 // fInputShape[0] -> BatchSize
399 // fInputShape[1] -> InputDepth
400 // fInputShape[2] -> InputHeight
401 // fInputShape[3] -> InputWidth
402 if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
403 fInputShape[1] = 1;
404 fInputShape[2] = 1;
406 }
407 // case when batch layout is not provided (all zero)
408 // batch layout can be determined by the input layout + batch size
409 // case DNN : { 1, B, W }
410 // case CNN : { B, C, H*W}
411 // case RNN : { B, T, H*W }
412
413 if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
414 // case first layer is DENSE
415 if (fInputShape[2] == 1 && fInputShape[1] == 1) {
416 // case of (1, batchsize, input features)
417 fBatchDepth = 1;
418 fBatchHeight = fTrainingSettings.front().batchSize;
420 }
421 else { // more general cases (e.g. for CNN)
422 // case CONV or RNN
423 fBatchDepth = fTrainingSettings.front().batchSize;
426 }
427 }
428}
429
430////////////////////////////////////////////////////////////////////////////////
431/// default initializations
433{
434 // Nothing to do here
435}
436
437////////////////////////////////////////////////////////////////////////////////
438/// Parse the input layout
440{
441 // Define the delimiter
442 const TString delim("|");
443
444 // Get the input layout string
445 TString inputLayoutString = this->GetInputLayoutString();
446
447 // Split the input layout string
448 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
449 TIter nextInputDim(inputDimStrings);
450 TObjString *inputDimString = (TObjString *)nextInputDim();
451
452 // Go through every token and save its absolute value in the shape array
453 // The first token is the batch size for easy compatibility with cudnn
454 int subDim = 1;
455 std::vector<size_t> inputShape;
456 inputShape.reserve(inputLayoutString.Length()/2 + 2);
457 inputShape.push_back(0); // Will be set later by Trainingsettings, use 0 value now
458 for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
459 // size_t is unsigned
460 subDim = (size_t) abs(inputDimString->GetString().Atoi());
461 // Size among unused dimensions should be set to 1 for cudnn
462 //if (subDim == 0) subDim = 1;
463 inputShape.push_back(subDim);
464 }
465 // it is expected that empty Shape has at least 4 dimensions. We pad the missing one's with 1
466 // for example in case of dense layer input layouts
467 // when we will support 3D convolutions we would need to add extra 1's
468 if (inputShape.size() == 2) {
469 // case of dense layer where only width is specified
470 inputShape.insert(inputShape.begin() + 1, {1,1});
471 }
472 else if (inputShape.size() == 3) {
473 //e.g. case of RNN T,W -> T,1,W
474 inputShape.insert(inputShape.begin() + 2, 1);
475 }
476
477 this->SetInputShape(inputShape);
478}
479
480////////////////////////////////////////////////////////////////////////////////
481/// Parse the input layout
483{
484 // Define the delimiter
485 const TString delim("|");
486
487 // Get the input layout string
488 TString batchLayoutString = this->GetBatchLayoutString();
489
490 size_t batchDepth = 0;
491 size_t batchHeight = 0;
492 size_t batchWidth = 0;
493
494 // Split the input layout string
495 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
496 TIter nextBatchDim(batchDimStrings);
497 TObjString *batchDimString = (TObjString *)nextBatchDim();
498 int idxToken = 0;
499
500 for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
501 switch (idxToken) {
502 case 0: // input depth
503 {
504 TString strDepth(batchDimString->GetString());
505 batchDepth = (size_t)strDepth.Atoi();
506 } break;
507 case 1: // input height
508 {
509 TString strHeight(batchDimString->GetString());
510 batchHeight = (size_t)strHeight.Atoi();
511 } break;
512 case 2: // input width
513 {
514 TString strWidth(batchDimString->GetString());
515 batchWidth = (size_t)strWidth.Atoi();
516 } break;
517 }
518 ++idxToken;
519 }
520
521 this->SetBatchDepth(batchDepth);
522 this->SetBatchHeight(batchHeight);
523 this->SetBatchWidth(batchWidth);
524}
525
526////////////////////////////////////////////////////////////////////////////////
527/// Create a deep net based on the layout string
528template <typename Architecture_t, typename Layer_t>
531{
532 // Layer specification, layer details
533 const TString layerDelimiter(",");
534 const TString subDelimiter("|");
535
536 TString layoutString = this->GetLayoutString();
537
538 //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
539
540 // Split layers
541 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
542 TIter nextLayer(layerStrings);
543 TObjString *layerString = (TObjString *)nextLayer();
544
545
546 for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
547
548 // Split layer details
549 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
550 TIter nextToken(subStrings);
551 TObjString *token = (TObjString *)nextToken();
552
553 // Determine the type of the layer
554 TString strLayerType = token->GetString();
555
556
557 if (strLayerType == "DENSE") {
558 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
559 } else if (strLayerType == "CONV") {
560 ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
561 } else if (strLayerType == "MAXPOOL") {
562 ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
563 } else if (strLayerType == "RESHAPE") {
564 ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
565 } else if (strLayerType == "BNORM") {
566 ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
567 } else if (strLayerType == "RNN") {
568 ParseRecurrentLayer(kLayerRNN, deepNet, nets, layerString->GetString(), subDelimiter);
569 } else if (strLayerType == "LSTM") {
570 ParseRecurrentLayer(kLayerLSTM, deepNet, nets, layerString->GetString(), subDelimiter);
571 } else if (strLayerType == "GRU") {
572 ParseRecurrentLayer(kLayerGRU, deepNet, nets, layerString->GetString(), subDelimiter);
573 } else {
574 // no type of layer specified - assume is dense layer as in old DNN interface
575 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
576 }
577 }
578}
579
580////////////////////////////////////////////////////////////////////////////////
581/// Pases the layer string and creates the appropriate dense layer
582template <typename Architecture_t, typename Layer_t>
584 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
585 TString delim)
586{
587 int width = 0;
589
590 // this return number of input variables for the method
591 // it can be used to deduce width of dense layer if specified as N+10
592 // where N is the number of input variables
593 const size_t inputSize = GetNvar();
594
595 // Split layer details
596 TObjArray *subStrings = layerString.Tokenize(delim);
597 TIter nextToken(subStrings);
598 TObjString *token = (TObjString *)nextToken();
599 int idxToken = 0;
600
601 // loop on the tokens
602 // order of sepcifying width and activation function is not relevant
603 // both 100|TANH and TANH|100 are valid cases
604 for (; token != nullptr; token = (TObjString *)nextToken()) {
605 idxToken++;
606 // try a match with the activation function
607 TString strActFnc(token->GetString());
608 // if first token defines the layer type- skip it
609 if (strActFnc =="DENSE") continue;
610
611 if (strActFnc == "RELU") {
612 activationFunction = DNN::EActivationFunction::kRelu;
613 } else if (strActFnc == "TANH") {
614 activationFunction = DNN::EActivationFunction::kTanh;
615 } else if (strActFnc == "FTANH") {
616 activationFunction = DNN::EActivationFunction::kFastTanh;
617 } else if (strActFnc == "SYMMRELU") {
618 activationFunction = DNN::EActivationFunction::kSymmRelu;
619 } else if (strActFnc == "SOFTSIGN") {
620 activationFunction = DNN::EActivationFunction::kSoftSign;
621 } else if (strActFnc == "SIGMOID") {
622 activationFunction = DNN::EActivationFunction::kSigmoid;
623 } else if (strActFnc == "LINEAR") {
624 activationFunction = DNN::EActivationFunction::kIdentity;
625 } else if (strActFnc == "GAUSS") {
626 activationFunction = DNN::EActivationFunction::kGauss;
627 } else if (width == 0) {
628 // no match found try to parse as text showing the width
629 // support for input a formula where the variable 'x' is 'N' in the string
630 // use TFormula for the evaluation
631 TString strNumNodes = strActFnc;
632 // number of nodes
633 TString strN("x");
634 strNumNodes.ReplaceAll("N", strN);
635 strNumNodes.ReplaceAll("n", strN);
636 TFormula fml("tmp", strNumNodes);
637 width = fml.Eval(inputSize);
638 }
639 }
640 // avoid zero width. assume is last layer and give width = output width
641 // Determine the number of outputs
642 size_t outputSize = 1;
644 outputSize = GetNTargets();
645 } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
646 outputSize = DataInfo().GetNClasses();
647 }
648 if (width == 0) width = outputSize;
649
650 // Add the dense layer, initialize the weights and biases and copy
651 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
652 denseLayer->Initialize();
653
654 // add same layer to fNet
655 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
656
657 //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
658
659 // add the copy to all slave nets
660 //for (size_t i = 0; i < nets.size(); i++) {
661 // nets[i].AddDenseLayer(copyDenseLayer);
662 //}
663
664 // check compatibility of added layer
665 // for a dense layer input should be 1 x 1 x DxHxW
666}
667
668////////////////////////////////////////////////////////////////////////////////
669/// Pases the layer string and creates the appropriate convolutional layer
670template <typename Architecture_t, typename Layer_t>
672 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
673 TString delim)
674{
675 int depth = 0;
676 int fltHeight = 0;
677 int fltWidth = 0;
678 int strideRows = 0;
679 int strideCols = 0;
680 int zeroPadHeight = 0;
681 int zeroPadWidth = 0;
683
684 // Split layer details
685 TObjArray *subStrings = layerString.Tokenize(delim);
686 TIter nextToken(subStrings);
687 TObjString *token = (TObjString *)nextToken();
688 int idxToken = 0;
689
690 for (; token != nullptr; token = (TObjString *)nextToken()) {
691 switch (idxToken) {
692 case 1: // depth
693 {
694 TString strDepth(token->GetString());
695 depth = strDepth.Atoi();
696 } break;
697 case 2: // filter height
698 {
699 TString strFltHeight(token->GetString());
700 fltHeight = strFltHeight.Atoi();
701 } break;
702 case 3: // filter width
703 {
704 TString strFltWidth(token->GetString());
705 fltWidth = strFltWidth.Atoi();
706 } break;
707 case 4: // stride in rows
708 {
709 TString strStrideRows(token->GetString());
710 strideRows = strStrideRows.Atoi();
711 } break;
712 case 5: // stride in cols
713 {
714 TString strStrideCols(token->GetString());
715 strideCols = strStrideCols.Atoi();
716 } break;
717 case 6: // zero padding height
718 {
719 TString strZeroPadHeight(token->GetString());
720 zeroPadHeight = strZeroPadHeight.Atoi();
721 } break;
722 case 7: // zero padding width
723 {
724 TString strZeroPadWidth(token->GetString());
725 zeroPadWidth = strZeroPadWidth.Atoi();
726 } break;
727 case 8: // activation function
728 {
729 TString strActFnc(token->GetString());
730 if (strActFnc == "RELU") {
731 activationFunction = DNN::EActivationFunction::kRelu;
732 } else if (strActFnc == "TANH") {
733 activationFunction = DNN::EActivationFunction::kTanh;
734 } else if (strActFnc == "SYMMRELU") {
735 activationFunction = DNN::EActivationFunction::kSymmRelu;
736 } else if (strActFnc == "SOFTSIGN") {
737 activationFunction = DNN::EActivationFunction::kSoftSign;
738 } else if (strActFnc == "SIGMOID") {
739 activationFunction = DNN::EActivationFunction::kSigmoid;
740 } else if (strActFnc == "LINEAR") {
741 activationFunction = DNN::EActivationFunction::kIdentity;
742 } else if (strActFnc == "GAUSS") {
743 activationFunction = DNN::EActivationFunction::kGauss;
744 }
745 } break;
746 }
747 ++idxToken;
748 }
749
750 // Add the convolutional layer, initialize the weights and biases and copy
751 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
752 zeroPadHeight, zeroPadWidth, activationFunction);
753 convLayer->Initialize();
754
755 // Add same layer to fNet
756 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
757 zeroPadHeight, zeroPadWidth, activationFunction);
758
759 //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
760
761 //// add the copy to all slave nets
762 //for (size_t i = 0; i < nets.size(); i++) {
763 // nets[i].AddConvLayer(copyConvLayer);
764 //}
765}
766
767////////////////////////////////////////////////////////////////////////////////
768/// Pases the layer string and creates the appropriate max pool layer
769template <typename Architecture_t, typename Layer_t>
771 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
772 TString delim)
773{
774
775 int filterHeight = 0;
776 int filterWidth = 0;
777 int strideRows = 0;
778 int strideCols = 0;
779
780 // Split layer details
781 TObjArray *subStrings = layerString.Tokenize(delim);
782 TIter nextToken(subStrings);
783 TObjString *token = (TObjString *)nextToken();
784 int idxToken = 0;
785
786 for (; token != nullptr; token = (TObjString *)nextToken()) {
787 switch (idxToken) {
788 case 1: // filter height
789 {
790 TString strFrmHeight(token->GetString());
791 filterHeight = strFrmHeight.Atoi();
792 } break;
793 case 2: // filter width
794 {
795 TString strFrmWidth(token->GetString());
796 filterWidth = strFrmWidth.Atoi();
797 } break;
798 case 3: // stride in rows
799 {
800 TString strStrideRows(token->GetString());
801 strideRows = strStrideRows.Atoi();
802 } break;
803 case 4: // stride in cols
804 {
805 TString strStrideCols(token->GetString());
806 strideCols = strStrideCols.Atoi();
807 } break;
808 }
809 ++idxToken;
810 }
811
812 // Add the Max pooling layer
813 // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
814 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
815
816 // Add the same layer to fNet
817 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
818
819
820 //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
821
822 //// add the copy to all slave nets
823 //for (size_t i = 0; i < nets.size(); i++) {
824 // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
825 //}
826}
827
828////////////////////////////////////////////////////////////////////////////////
829/// Pases the layer string and creates the appropriate reshape layer
830template <typename Architecture_t, typename Layer_t>
832 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
833 TString delim)
834{
835 int depth = 0;
836 int height = 0;
837 int width = 0;
838 bool flattening = false;
839
840 // Split layer details
841 TObjArray *subStrings = layerString.Tokenize(delim);
842 TIter nextToken(subStrings);
843 TObjString *token = (TObjString *)nextToken();
844 int idxToken = 0;
845
846 for (; token != nullptr; token = (TObjString *)nextToken()) {
847 if (token->GetString() == "FLAT") idxToken=4;
848 switch (idxToken) {
849 case 1: {
850 TString strDepth(token->GetString());
851 depth = strDepth.Atoi();
852 } break;
853 case 2: // height
854 {
855 TString strHeight(token->GetString());
856 height = strHeight.Atoi();
857 } break;
858 case 3: // width
859 {
860 TString strWidth(token->GetString());
861 width = strWidth.Atoi();
862 } break;
863 case 4: // flattening
864 {
865 TString flat(token->GetString());
866 if (flat == "FLAT") {
867 flattening = true;
868 }
869 } break;
870 }
871 ++idxToken;
872 }
873
874 // Add the reshape layer
875 // TReshapeLayer<Architecture_t> *reshapeLayer =
876 deepNet.AddReshapeLayer(depth, height, width, flattening);
877
878 // Add the same layer to fNet
879 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
880
881 //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
882
883 //// add the copy to all slave nets
884 //for (size_t i = 0; i < nets.size(); i++) {
885 // nets[i].AddReshapeLayer(copyReshapeLayer);
886 //}
887}
888
889////////////////////////////////////////////////////////////////////////////////
890/// Pases the layer string and creates the appropriate reshape layer
891template <typename Architecture_t, typename Layer_t>
893 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
894 TString delim)
895{
896
897 // default values
898 double momentum = -1; //0.99;
899 double epsilon = 0.0001;
900
901 // Split layer details
902 TObjArray *subStrings = layerString.Tokenize(delim);
903 TIter nextToken(subStrings);
904 TObjString *token = (TObjString *)nextToken();
905 int idxToken = 0;
906
907 for (; token != nullptr; token = (TObjString *)nextToken()) {
908 switch (idxToken) {
909 case 1: {
910 momentum = std::atof(token->GetString().Data());
911 } break;
912 case 2: // height
913 {
914 epsilon = std::atof(token->GetString().Data());
915 } break;
916 }
917 ++idxToken;
918 }
919
920 // Add the batch norm layer
921 //
922 auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
923 layer->Initialize();
924
925 // Add the same layer to fNet
926 if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
927
928}
929
930////////////////////////////////////////////////////////////////////////////////
931/// Pases the layer string and creates the appropriate rnn layer
932template <typename Architecture_t, typename Layer_t>
934 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
935 TString delim)
936{
937 // int depth = 0;
938 int stateSize = 0;
939 int inputSize = 0;
940 int timeSteps = 0;
941 bool rememberState = false;
942 bool returnSequence = false;
943 bool resetGateAfter = false;
944
945 // Split layer details
946 TObjArray *subStrings = layerString.Tokenize(delim);
947 TIter nextToken(subStrings);
948 TObjString *token = (TObjString *)nextToken();
949 int idxToken = 0;
950
951 for (; token != nullptr; token = (TObjString *)nextToken()) {
952 switch (idxToken) {
953 case 1: // state size
954 {
955 TString strstateSize(token->GetString());
956 stateSize = strstateSize.Atoi();
957 break;
958 }
959 case 2: // input size
960 {
961 TString strinputSize(token->GetString());
962 inputSize = strinputSize.Atoi();
963 break;
964 }
965 case 3: // time steps
966 {
967 TString strtimeSteps(token->GetString());
968 timeSteps = strtimeSteps.Atoi();
969 break;
970 }
971 case 4: // returnSequence (option stateful in Keras)
972 {
973 TString strrememberState(token->GetString());
974 rememberState = (bool) strrememberState.Atoi();
975 break;
976 }
977 case 5: // return full output sequence (1 or 0)
978 {
979 TString str(token->GetString());
980 returnSequence = (bool)str.Atoi();
981 break;
982 }
983 case 6: // resetGate after option (only for GRU)
984 {
985 TString str(token->GetString());
986 resetGateAfter = (bool)str.Atoi();
987 }
988 }
989 ++idxToken;
990 }
991
992 // Add the recurrent layer, initialize the weights and biases and copy
993 if (rnnType == kLayerRNN) {
994 auto * recurrentLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
995 recurrentLayer->Initialize();
996 // Add same layer to fNet
997 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
998 }
999 else if (rnnType == kLayerLSTM ) {
1000 auto *recurrentLayer = deepNet.AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1001 recurrentLayer->Initialize();
1002 // Add same layer to fNet
1003 if (fBuildNet)
1004 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1005 }
1006 else if (rnnType == kLayerGRU) {
1007 if (Architecture_t::IsCudnn()) resetGateAfter = true; // needed for Cudnn
1008 auto *recurrentLayer = deepNet.AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1009 recurrentLayer->Initialize();
1010 // Add same layer to fNet
1011 if (fBuildNet)
1012 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1013 }
1014 else {
1015 Log() << kFATAL << "Invalid Recurrent layer type " << Endl;
1016 }
1017}
1018
1019////////////////////////////////////////////////////////////////////////////////
1020/// Standard constructor.
1021MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
1022 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1023 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1024 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1025 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1026 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1027 fXInput()
1028{
1029 // Nothing to do here
1030}
1031
1032////////////////////////////////////////////////////////////////////////////////
1033/// Constructor from a weight file.
1034MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
1035 : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1036 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1037 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1038 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1039 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1040 fXInput()
1041{
1042 // Nothing to do here
1043}
1044
1045////////////////////////////////////////////////////////////////////////////////
1046/// Destructor.
1048{
1049 // Nothing to do here
1050}
1051
1052////////////////////////////////////////////////////////////////////////////////
1053/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1054auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1055{
1056 // remove empty spaces
1057 parseString.ReplaceAll(" ","");
1058 KeyValueVector_t blockKeyValues;
1059 const TString keyValueDelim("=");
1060
1061 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1062 TIter nextBlock(blockStrings);
1063 TObjString *blockString = (TObjString *)nextBlock();
1064
1065 for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1066 blockKeyValues.push_back(std::map<TString, TString>());
1067 std::map<TString, TString> &currentBlock = blockKeyValues.back();
1068
1069 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1070 TIter nextToken(subStrings);
1071 TObjString *token = (TObjString *)nextToken();
1072
1073 for (; token != nullptr; token = (TObjString *)nextToken()) {
1074 TString strKeyValue(token->GetString());
1075 int delimPos = strKeyValue.First(keyValueDelim.Data());
1076 if (delimPos <= 0) continue;
1077
1078 TString strKey = TString(strKeyValue(0, delimPos));
1079 strKey.ToUpper();
1080 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1081
1082 strKey.Strip(TString::kBoth, ' ');
1083 strValue.Strip(TString::kBoth, ' ');
1084
1085 currentBlock.insert(std::make_pair(strKey, strValue));
1086 }
1087 }
1088 return blockKeyValues;
1089}
1090
1091////////////////////////////////////////////////////////////////////////////////
1092/// What kind of analysis type can handle the CNN
1094{
1095 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1096 if (type == Types::kMulticlass) return kTRUE;
1097 if (type == Types::kRegression) return kTRUE;
1098
1099 return kFALSE;
1100}
1101
1102////////////////////////////////////////////////////////////////////////////////
1103/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1104/// 100 etc.
1105/// - 20% and 0.2 selects 20% of the training set as validation data.
1106/// - 100 selects 100 events as the validation data.
1107///
1108/// @return number of samples in validation set
1109///
1111{
1112 Int_t nValidationSamples = 0;
1113 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1114
1115 // Parsing + Validation
1116 // --------------------
1117 if (fNumValidationString.EndsWith("%")) {
1118 // Relative spec. format 20%
1119 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1120
1121 if (intValStr.IsFloat()) {
1122 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1123 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1124 } else {
1125 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1126 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1127 }
1128 } else if (fNumValidationString.IsFloat()) {
1129 Double_t valSizeAsDouble = fNumValidationString.Atof();
1130
1131 if (valSizeAsDouble < 1.0) {
1132 // Relative spec. format 0.2
1133 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1134 } else {
1135 // Absolute spec format 100 or 100.0
1136 nValidationSamples = valSizeAsDouble;
1137 }
1138 } else {
1139 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1140 << Endl;
1141 }
1142
1143 // Value validation
1144 // ----------------
1145 if (nValidationSamples < 0) {
1146 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1147 }
1148
1149 if (nValidationSamples == 0) {
1150 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1151 }
1152
1153 if (nValidationSamples >= (Int_t)trainingSetSize) {
1154 Log() << kFATAL << "Validation size \"" << fNumValidationString
1155 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1156 }
1157
1158 return nValidationSamples;
1159}
1160
1161
1162////////////////////////////////////////////////////////////////////////////////
1163/// Implementation of architecture specific train method
1164///
1165template <typename Architecture_t>
1167{
1168
1169 using Scalar_t = typename Architecture_t::Scalar_t;
1172 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1173
1174 bool debug = Log().GetMinType() == kDEBUG;
1175
1176
1177 // set the random seed for weight initialization
1178 Architecture_t::SetRandomSeed(fRandomSeed);
1179
1180 ///split training data in training and validation data
1181 // and determine the number of training and testing examples
1182
1183 size_t nValidationSamples = GetNumValidationSamples();
1184 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1185
1186 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1187 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1188 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1189
1190 size_t trainingPhase = 1;
1191
1192 for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1193
1194 size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1195
1196
1197 // After the processing of the options, initialize the master deep net
1198 size_t batchSize = settings.batchSize;
1199 this->SetBatchSize(batchSize);
1200 // Should be replaced by actual implementation. No support for this now.
1201 size_t inputDepth = this->GetInputDepth();
1202 size_t inputHeight = this->GetInputHeight();
1203 size_t inputWidth = this->GetInputWidth();
1204 size_t batchDepth = this->GetBatchDepth();
1205 size_t batchHeight = this->GetBatchHeight();
1206 size_t batchWidth = this->GetBatchWidth();
1207 ELossFunction J = this->GetLossFunction();
1209 ERegularization R = settings.regularization;
1210 EOptimizer O = settings.optimizer;
1211 Scalar_t weightDecay = settings.weightDecay;
1212
1213 //Batch size should be included in batch layout as well. There are two possibilities:
1214 // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1215 // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1216 // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1217 // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1218
1219 if (batchDepth != batchSize && batchDepth > 1) {
1220 Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1221 return;
1222 }
1223 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1224 Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1225 return;
1226 }
1227
1228
1229 //check also that input layout compatible with batch layout
1230 bool badLayout = false;
1231 // case batch depth == batch size
1232 if (batchDepth == batchSize)
1233 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1234 // case batch Height is batch size
1235 if (batchHeight == batchSize && batchDepth == 1)
1236 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1237 if (badLayout) {
1238 Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1239 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1240 return;
1241 }
1242
1243 // check batch size is compatible with number of events
1244 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1245 Log() << kFATAL << "Number of samples in the datasets are train: ("
1246 << nTrainingSamples << ") test: (" << nValidationSamples
1247 << "). One of these is smaller than the batch size of "
1248 << settings.batchSize << ". Please increase the batch"
1249 << " size to be at least the same size as the smallest"
1250 << " of them." << Endl;
1251 }
1252
1253 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1254
1255 // create a copy of DeepNet for evaluating but with batch size = 1
1256 // fNet is the saved network and will be with CPU or Referrence architecture
1257 if (trainingPhase == 1) {
1258 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1259 batchHeight, batchWidth, J, I, R, weightDecay));
1260 fBuildNet = true;
1261 }
1262 else
1263 fBuildNet = false;
1264
1265 // Initialize the vector of slave nets
1266 std::vector<DeepNet_t> nets{};
1267 nets.reserve(nThreads);
1268 for (size_t i = 0; i < nThreads; i++) {
1269 // create a copies of the master deep net
1270 nets.push_back(deepNet);
1271 }
1272
1273
1274 // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1275 CreateDeepNet(deepNet, nets);
1276
1277
1278 // set droput probabilities
1279 // use convention to store in the layer 1.- dropout probabilities
1280 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1281 for (auto & p : dropoutVector) {
1282 p = 1.0 - p;
1283 }
1284 deepNet.SetDropoutProbabilities(dropoutVector);
1285
1286 if (trainingPhase > 1) {
1287 // copy initial weights from fNet to deepnet
1288 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1289 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1290 }
1291 }
1292
1293 // when fNet is built create also input matrix that will be used to evaluate it
1294 if (fBuildNet) {
1295 //int n1 = batchHeight;
1296 //int n2 = batchWidth;
1297 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1298 //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1299 //fXInput = TensorImpl_t(1,n1,n2);
1301 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1302 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1303 fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1304
1305
1306 // create pointer to output matrix used for the predictions
1307 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1308
1309 // print the created network
1310 Log() << "***** Deep Learning Network *****" << Endl;
1311 if (Log().GetMinType() <= kINFO)
1312 deepNet.Print();
1313 }
1314 Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1315
1316 // Loading the training and validation datasets
1317 TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1318 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1319 {inputDepth, inputHeight, inputWidth},
1320 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1321 deepNet.GetOutputWidth(), nThreads);
1322
1323 TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1324 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1325 {inputDepth, inputHeight, inputWidth},
1326 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1327 deepNet.GetOutputWidth(), nThreads);
1328
1329
1330
1331 // do an evaluation of the network to compute initial minimum test error
1332
1333 Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1334
1335 Double_t minValError = 0.0;
1336 Log() << "Compute initial loss on the validation data " << Endl;
1337 for (auto batch : validationData) {
1338 auto inputTensor = batch.GetInput();
1339 auto outputMatrix = batch.GetOutput();
1340 auto weights = batch.GetWeights();
1341
1342 //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1343 // should we apply droput to the loss ??
1344 minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1345 }
1346 // add Regularization term
1347 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1348 minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1349 minValError += regzTerm;
1350
1351
1352 // create a pointer to base class VOptimizer
1353 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1354
1355 // initialize the base class pointer with the corresponding derived class object.
1356 switch (O) {
1357
1358 case EOptimizer::kSGD:
1359 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1360 new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1361 break;
1362
1363 case EOptimizer::kAdam: {
1364 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1366 deepNet, settings.learningRate, settings.optimizerParams["ADAM_beta1"],
1367 settings.optimizerParams["ADAM_beta2"], settings.optimizerParams["ADAM_eps"]));
1368 break;
1369 }
1370
1371 case EOptimizer::kAdagrad:
1372 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1373 new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1374 settings.optimizerParams["ADAGRAD_eps"]));
1375 break;
1376
1377 case EOptimizer::kRMSProp:
1378 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1379 new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum,
1380 settings.optimizerParams["RMSPROP_rho"],
1381 settings.optimizerParams["RMSPROP_eps"]));
1382 break;
1383
1384 case EOptimizer::kAdadelta:
1385 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1386 new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1387 settings.optimizerParams["ADADELTA_rho"],
1388 settings.optimizerParams["ADADELTA_eps"]));
1389 break;
1390 }
1391
1392
1393 // Initialize the vector of batches, one batch for one slave network
1394 std::vector<TTensorBatch<Architecture_t>> batches{};
1395
1396 bool converged = false;
1397 size_t convergenceCount = 0;
1398 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1399
1400 // start measuring
1401 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1402 tstart = std::chrono::system_clock::now();
1403
1404 // function building string with optimizer parameters values for logging
1405 auto optimParametersString = [&]() {
1406 TString optimParameters;
1407 for ( auto & element : settings.optimizerParams) {
1408 TString key = element.first;
1409 key.ReplaceAll(settings.optimizerName + "_", ""); // strip optimizerName_
1410 double value = element.second;
1411 if (!optimParameters.IsNull())
1412 optimParameters += ",";
1413 else
1414 optimParameters += " (";
1415 optimParameters += TString::Format("%s=%g", key.Data(), value);
1416 }
1417 if (!optimParameters.IsNull())
1418 optimParameters += ")";
1419 return optimParameters;
1420 };
1421
1422 Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1423 << " Optimizer " << settings.optimizerName
1424 << optimParametersString()
1425 << " Learning rate = " << settings.learningRate << " regularization " << (char)settings.regularization
1426 << " minimum error = " << minValError << Endl;
1427 if (!fInteractive) {
1428 std::string separator(62, '-');
1429 Log() << separator << Endl;
1430 Log() << std::setw(10) << "Epoch"
1431 << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err." << std::setw(12)
1432 << "t(s)/epoch" << std::setw(12) << "t(s)/Loss" << std::setw(12) << "nEvents/s" << std::setw(12)
1433 << "Conv. Steps" << Endl;
1434 Log() << separator << Endl;
1435 }
1436
1437 // set up generator for shuffling the batches
1438 // if seed is zero we have always a different order in the batches
1439 size_t shuffleSeed = 0;
1440 if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1441 RandomGenerator<TRandom3> rng(shuffleSeed);
1442
1443 // print weights before
1444 if (fBuildNet && debug) {
1445 Log() << "Initial Deep Net Weights " << Endl;
1446 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1447 for (size_t l = 0; l < weights_tensor.size(); ++l)
1448 weights_tensor[l].Print();
1449 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1450 bias_tensor[0].Print();
1451 }
1452
1453 Log() << " Start epoch iteration ..." << Endl;
1454 bool debugFirstEpoch = false;
1455 bool computeLossInTraining = true; // compute loss in training or at test time
1456 size_t nTrainEpochs = 0;
1457 while (!converged) {
1458 nTrainEpochs++;
1459 trainingData.Shuffle(rng);
1460
1461 // execute all epochs
1462 //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1463
1464 Double_t trainingError = 0;
1465 for (size_t i = 0; i < batchesInEpoch; ++i ) {
1466 // Clean and load new batches, one batch for one slave net
1467 //batches.clear();
1468 //batches.reserve(nThreads);
1469 //for (size_t j = 0; j < nThreads; j++) {
1470 // batches.push_back(trainingData.GetTensorBatch());
1471 //}
1472 if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1473
1474 auto my_batch = trainingData.GetTensorBatch();
1475
1476 if (debugFirstEpoch)
1477 std::cout << "got batch data - doing forward \n";
1478
1479#ifdef DEBUG
1480
1481 Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1482 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1483 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1484 Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1485 Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1486#endif
1487
1488 deepNet.Forward(my_batch.GetInput(), true);
1489 // compute also loss
1490 if (computeLossInTraining) {
1491 auto outputMatrix = my_batch.GetOutput();
1492 auto weights = my_batch.GetWeights();
1493 trainingError += deepNet.Loss(outputMatrix, weights, false);
1494 }
1495
1496 if (debugFirstEpoch)
1497 std::cout << "- doing backward \n";
1498
1499#ifdef DEBUG
1500 size_t nlayers = deepNet.GetLayers().size();
1501 for (size_t l = 0; l < nlayers; ++l) {
1502 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1503 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1504 TString::Format("initial weights layer %d", l).Data());
1505
1506 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1507 TString::Format("output tensor layer %d", l).Data());
1508 }
1509#endif
1510
1511 //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1512
1513 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1514
1515 if (debugFirstEpoch)
1516 std::cout << "- doing optimizer update \n";
1517
1518 // increment optimizer step that is used in some algorithms (e.g. ADAM)
1519 optimizer->IncrementGlobalStep();
1520 optimizer->Step();
1521
1522#ifdef DEBUG
1523 std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1524 for (size_t l = 0; l < nlayers; ++l) {
1525 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1526 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1527 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1528 }
1529 }
1530#endif
1531
1532 }
1533
1534 if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1535 //}
1536 debugFirstEpoch = false;
1537 if ((nTrainEpochs % settings.testInterval) == 0) {
1538
1539 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1540
1541 t1 = std::chrono::system_clock::now();
1542
1543 // Compute validation error.
1544
1545
1546 Double_t valError = 0.0;
1547 bool inTraining = false;
1548 for (auto batch : validationData) {
1549 auto inputTensor = batch.GetInput();
1550 auto outputMatrix = batch.GetOutput();
1551 auto weights = batch.GetWeights();
1552 // should we apply droput to the loss ??
1553 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1554 }
1555 // normalize loss to number of batches and add regularization term
1556 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1557 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1558 valError += regTerm;
1559
1560 //Log the loss value
1561 fTrainHistory.AddValue("valError",nTrainEpochs,valError);
1562
1563 t2 = std::chrono::system_clock::now();
1564
1565 // checking for convergence
1566 if (valError < minValError) {
1567 convergenceCount = 0;
1568 } else {
1569 convergenceCount += settings.testInterval;
1570 }
1571
1572 // copy configuration when reached a minimum error
1573 if (valError < minValError ) {
1574 // Copy weights from deepNet to fNet
1575 Log() << std::setw(10) << nTrainEpochs
1576 << " Minimum Test error found - save the configuration " << Endl;
1577 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1578 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1579 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1580 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1581 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1582 // }
1583 }
1584 // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1585 // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1586
1587 minValError = valError;
1588 }
1589 else if ( minValError <= 0. )
1590 minValError = valError;
1591
1592 if (!computeLossInTraining) {
1593 trainingError = 0.0;
1594 // Compute training error.
1595 for (auto batch : trainingData) {
1596 auto inputTensor = batch.GetInput();
1597 auto outputMatrix = batch.GetOutput();
1598 auto weights = batch.GetWeights();
1599 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1600 }
1601 }
1602 // normalize loss to number of batches and add regularization term
1603 trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1604 trainingError += regTerm;
1605
1606 //Log the loss value
1607 fTrainHistory.AddValue("trainingError",nTrainEpochs,trainingError);
1608
1609 // stop measuring
1610 tend = std::chrono::system_clock::now();
1611
1612 // Compute numerical throughput.
1613 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1614 std::chrono::duration<double> elapsed1 = t1-tstart;
1615 // std::chrono::duration<double> elapsed2 = t2-tstart;
1616 // time to compute training and test errors
1617 std::chrono::duration<double> elapsed_testing = tend-t1;
1618
1619 double seconds = elapsed_seconds.count();
1620 // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1621 // nGFlops *= deepnet.GetNFlops() * 1e-9;
1622 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1623
1624 converged =
1625 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1626
1627
1628 Log() << std::setw(10) << nTrainEpochs << " | "
1629 << std::setw(12) << trainingError
1630 << std::setw(12) << valError
1631 << std::setw(12) << seconds / settings.testInterval
1632 << std::setw(12) << elapsed_testing.count()
1633 << std::setw(12) << 1. / eventTime
1634 << std::setw(12) << convergenceCount
1635 << Endl;
1636
1637 if (converged) {
1638 Log() << Endl;
1639 }
1640 tstart = std::chrono::system_clock::now();
1641 }
1642
1643 // if (stepCount % 10 == 0 || converged) {
1644 if (converged && debug) {
1645 Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << nTrainEpochs
1646 << Endl;
1647 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1648 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1649 for (size_t l = 0; l < weights_tensor.size(); ++l)
1650 weights_tensor[l].Print();
1651 bias_tensor[0].Print();
1652 }
1653
1654 }
1655
1656 trainingPhase++;
1657 } // end loop on training Phase
1658}
1659
1660////////////////////////////////////////////////////////////////////////////////
1662{
1663 if (fInteractive) {
1664 Log() << kFATAL << "Not implemented yet" << Endl;
1665 return;
1666 }
1667
1668 // using for training same scalar type defined for the prediction
1669 if (this->GetArchitectureString() == "GPU") {
1670#ifdef R__HAS_TMVAGPU
1671 Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1672#ifdef R__HAS_CUDNN
1673 TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1674#else
1675 TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1676#endif
1677#else
1678 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1679 "you have CUDA installed and it was successfully "
1680 "detected by CMAKE."
1681 << Endl;
1682 return;
1683#endif
1684 } else if (this->GetArchitectureString() == "CPU") {
1685#ifdef R__HAS_TMVACPU
1686 // note that number of threads used for BLAS might be different
1687 // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1688 Log() << kINFO << "Start of deep neural network training on CPU using MT, nthreads = "
1689 << gConfig().GetNCpu() << Endl << Endl;
1690#else
1691 Log() << kINFO << "Start of deep neural network training on single thread CPU (without ROOT-MT support) " << Endl
1692 << Endl;
1693#endif
1694 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1695 return;
1696 }
1697 else {
1698 Log() << kFATAL << this->GetArchitectureString() <<
1699 " is not a supported architecture for TMVA::MethodDL"
1700 << Endl;
1701 }
1702
1703}
1704
1705
1706////////////////////////////////////////////////////////////////////////////////
1707Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1708{
1709
1710 // note that fNet should have been build with a batch size of 1
1711
1712 if (!fNet || fNet->GetDepth() == 0) {
1713 Log() << kFATAL << "The network has not been trained and fNet is not built"
1714 << Endl;
1715 }
1716
1717 // input size must be equal to 1 which is the batch size of fNet
1718 R__ASSERT(fNet->GetBatchSize() == 1);
1719
1720 // int batchWidth = fNet->GetBatchWidth();
1721 // int batchDepth = fNet->GetBatchDepth();
1722 // int batchHeight = fNet->GetBatchHeight();
1723// int noutput = fNet->GetOutputWidth();
1724
1725
1726 // get current event
1727 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1728
1729 size_t nVariables = GetEvent()->GetNVariables();
1730
1731 // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1733 R__ASSERT(fXInput.GetShape().size() < 4);
1734 size_t nc, nhw = 0;
1735 if (fXInput.GetShape().size() == 2) {
1736 nc = fXInput.GetShape()[0];
1737 if (nc != 1 ) {
1739 Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1"
1740 << Endl;
1741 }
1742 nhw = fXInput.GetShape()[1];
1743 } else {
1744 nc = fXInput.GetCSize();
1745 nhw = fXInput.GetWSize();
1746 }
1747 if ( nVariables != nc * nhw) {
1748 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1749 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw
1750 << Endl;
1751 }
1752 for (size_t j = 0; j < nc; j++) {
1753 for (size_t k = 0; k < nhw; k++) {
1754 // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1755 fXInputBuffer[ k * nc + j] = inputValues[j*nhw + k]; // for column layout !!!
1756 }
1757 }
1758 } else {
1759 // row-wise layout
1760 assert(fXInput.GetShape().size() >= 4);
1761 size_t nc = fXInput.GetCSize();
1762 size_t nh = fXInput.GetHSize();
1763 size_t nw = fXInput.GetWSize();
1764 size_t n = nc * nh * nw;
1765 if ( nVariables != n) {
1766 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1767 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1768 << Endl;
1769 }
1770 for (size_t j = 0; j < n; j++) {
1771 // in this case TMVA event has same order as input tensor
1772 fXInputBuffer[ j ] = inputValues[j]; // for column layout !!!
1773 }
1774 }
1775 // copy buffer in input
1776 fXInput.GetDeviceBuffer().CopyFrom( fXInputBuffer);
1777
1778 // perform the prediction
1779 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1780
1781 // return value
1782 double mvaValue = (*fYHat)(0, 0);
1783
1784 // for debugging
1785#ifdef DEBUG_MVAVALUE
1786 using Tensor_t = std::vector<MatrixImpl_t>;
1787 TMatrixF xInput(n1,n2, inputValues.data() );
1788 std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1789 xInput.Print();
1790 std::cout << "Output of DeepNet " << mvaValue << std::endl;
1791 auto & deepnet = *fNet;
1792 std::cout << "Loop on layers " << std::endl;
1793 for (int l = 0; l < deepnet.GetDepth(); ++l) {
1794 std::cout << "Layer " << l;
1795 const auto * layer = deepnet.GetLayerAt(l);
1796 const Tensor_t & layer_output = layer->GetOutput();
1797 layer->Print();
1798 std::cout << "DNN output " << layer_output.size() << std::endl;
1799 for (size_t i = 0; i < layer_output.size(); ++i) {
1800#ifdef R__HAS_TMVAGPU
1801 //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1802 TMatrixD m = layer_output[i];
1803#else
1804 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1805#endif
1806 m.Print();
1807 }
1808 const Tensor_t & layer_weights = layer->GetWeights();
1809 std::cout << "DNN weights " << layer_weights.size() << std::endl;
1810 if (layer_weights.size() > 0) {
1811 int i = 0;
1812#ifdef R__HAS_TMVAGPU
1813 TMatrixD m = layer_weights[i];
1814// TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1815#else
1816 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1817#endif
1818 m.Print();
1819 }
1820 }
1821#endif
1822
1823 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1824}
1825////////////////////////////////////////////////////////////////////////////////
1826/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1827////////////////////////////////////////////////////////////////////////////////
1828template <typename Architecture_t>
1829std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1830{
1831
1832 // Check whether the model is setup
1833 if (!fNet || fNet->GetDepth() == 0) {
1834 Log() << kFATAL << "The network has not been trained and fNet is not built"
1835 << Endl;
1836 }
1837
1838 // rebuild the networks
1839 this->SetBatchSize(batchSize);
1840 size_t inputDepth = this->GetInputDepth();
1841 size_t inputHeight = this->GetInputHeight();
1842 size_t inputWidth = this->GetInputWidth();
1843 size_t batchDepth = this->GetBatchDepth();
1844 size_t batchHeight = this->GetBatchHeight();
1845 size_t batchWidth = this->GetBatchWidth();
1846 ELossFunction J = fNet->GetLossFunction();
1847 EInitialization I = fNet->GetInitialization();
1848 ERegularization R = fNet->GetRegularization();
1849 Double_t weightDecay = fNet->GetWeightDecay();
1850
1851 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1852 using Matrix_t = typename Architecture_t::Matrix_t;
1853 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1854
1855 // create the deep neural network
1856 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1857 std::vector<DeepNet_t> nets{};
1858 fBuildNet = false;
1859 CreateDeepNet(deepNet,nets);
1860
1861 // copy weights from the saved fNet to the built DeepNet
1862 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1863 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1864 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1865 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1866 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1867 // }
1868 }
1869
1870 size_t n1 = deepNet.GetBatchHeight();
1871 size_t n2 = deepNet.GetBatchWidth();
1872 size_t n0 = deepNet.GetBatchSize();
1873 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1874 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1875 n1 = deepNet.GetBatchSize();
1876 n0 = 1;
1877 }
1878 //this->SetBatchDepth(n0);
1879 Long64_t nEvents = lastEvt - firstEvt;
1880 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1881 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1882
1883
1884 // Tensor_t xInput;
1885 // for (size_t i = 0; i < n0; ++i)
1886 // xInput.emplace_back(Matrix_t(n1,n2));
1887
1888 // create pointer to output matrix used for the predictions
1889 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1890
1891 // use timer
1892 Timer timer( nEvents, GetName(), kTRUE );
1893
1894 if (logProgress)
1895 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1896 << "Evaluation of " << GetMethodName() << " on "
1897 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1898 << " sample (" << nEvents << " events)" << Endl;
1899
1900
1901 // eventg loop
1902 std::vector<double> mvaValues(nEvents);
1903
1904
1905 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1906
1907 Long64_t ievt_end = ievt + batchSize;
1908 // case of batch prediction for
1909 if (ievt_end <= lastEvt) {
1910
1911 if (ievt == firstEvt) {
1912 Data()->SetCurrentEvent(ievt);
1913 size_t nVariables = GetEvent()->GetNVariables();
1914
1915 if (n1 == batchSize && n0 == 1) {
1916 if (n2 != nVariables) {
1917 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1918 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1919 << Endl;
1920 }
1921 } else {
1922 if (n1*n2 != nVariables || n0 != batchSize) {
1923 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1924 << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1925 << Endl;
1926 }
1927 }
1928 }
1929
1930 auto batch = testData.GetTensorBatch();
1931 auto inputTensor = batch.GetInput();
1932
1933 auto xInput = batch.GetInput();
1934 // make the prediction
1935 deepNet.Prediction(yHat, xInput, fOutputFunction);
1936 for (size_t i = 0; i < batchSize; ++i) {
1937 double value = yHat(i,0);
1938 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1939 }
1940 }
1941 else {
1942 // case of remaining events: compute prediction by single event !
1943 for (Long64_t i = ievt; i < lastEvt; ++i) {
1944 Data()->SetCurrentEvent(i);
1945 mvaValues[i] = GetMvaValue();
1946 }
1947 }
1948 }
1949
1950 if (logProgress) {
1951 Log() << kINFO
1952 << "Elapsed time for evaluation of " << nEvents << " events: "
1953 << timer.GetElapsedTime() << " " << Endl;
1954 }
1955
1956 return mvaValues;
1957}
1958
1959const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1960{
1961 size_t nVariables = GetEvent()->GetNVariables();
1962 MatrixImpl_t X(1, nVariables);
1963 TensorImpl_t X_vec ( 1, 1, nVariables); // needs to be really 1
1964 const Event *ev = GetEvent();
1965 const std::vector<Float_t>& inputValues = ev->GetValues();
1966 for (size_t i = 0; i < nVariables; i++) {
1967 X_vec(0,i,0) = inputValues[i]; // in case of column format !!
1968 }
1969 //X_vec.emplace_back(X);
1970
1971 size_t nTargets = std::max(1u, ev->GetNTargets());
1972 MatrixImpl_t YHat(1, nTargets);
1973 std::vector<Float_t> output(nTargets);
1974 fNet->Prediction(YHat, X_vec, fOutputFunction);
1975
1976 for (size_t i = 0; i < nTargets; i++)
1977 output[i] = YHat(0, i);
1978
1979 if (fRegressionReturnVal == NULL) {
1980 fRegressionReturnVal = new std::vector<Float_t>();
1981 }
1982 fRegressionReturnVal->clear();
1983
1984 Event * evT = new Event(*ev);
1985 for (size_t i = 0; i < nTargets; ++i) {
1986 evT->SetTarget(i, output[i]);
1987 }
1988
1989 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1990 for (size_t i = 0; i < nTargets; ++i) {
1991 fRegressionReturnVal->push_back(evT2->GetTarget(i));
1992 }
1993 delete evT;
1994 return *fRegressionReturnVal;
1995}
1996
1997const std::vector<Float_t> & TMVA::MethodDL::GetMulticlassValues()
1998{
1999 size_t nVariables = GetEvent()->GetNVariables();
2000 MatrixImpl_t X(1, nVariables);
2001 TensorImpl_t X_vec ( 1, 1, nVariables);
2002 MatrixImpl_t YHat(1, DataInfo().GetNClasses());
2003 if (fMulticlassReturnVal == NULL) {
2004 fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
2005 }
2006
2007 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
2008 for (size_t i = 0; i < nVariables; i++) {
2009 X_vec(0,i, 0) = inputValues[i];
2010 }
2011 //X_vec.emplace_back(X);
2012 fNet->Prediction(YHat, X_vec, fOutputFunction);
2013 for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
2014 (*fMulticlassReturnVal)[i] = YHat(0, i);
2015 }
2016 return *fMulticlassReturnVal;
2017}
2018
2019
2020////////////////////////////////////////////////////////////////////////////////
2021/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
2022////////////////////////////////////////////////////////////////////////////////
2023std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
2024{
2025 // Long64_t nEvents = Data()->GetNEvents();
2026 // std::vector<Double_t> v(nEvents);
2027 // for (Long64_t i = 0; i < nEvents; ++i) {
2028 // Data()->SetCurrentEvent(i);
2029 // v[i] = GetMvaValue();
2030 // }
2031 // return v;
2032
2033
2034 Long64_t nEvents = Data()->GetNEvents();
2035 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2036 if (firstEvt < 0) firstEvt = 0;
2037 nEvents = lastEvt-firstEvt;
2038
2039 // use same batch size as for training (from first strategy)
2040 size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2041 size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2042 if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
2043
2044 // using for training same scalar type defined for the prediction
2045 if (this->GetArchitectureString() == "GPU") {
2046#ifdef R__HAS_TMVAGPU
2047 Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2048#ifdef R__HAS_CUDNN
2049 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2050#else
2051 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2052#endif
2053
2054#endif
2055 }
2056 Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2057 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2058}
2059////////////////////////////////////////////////////////////////////////////////
2060void MethodDL::AddWeightsXMLTo(void * parent) const
2061{
2062 // Create the parent XML node with name "Weights"
2063 auto & xmlEngine = gTools().xmlengine();
2064 void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2065
2066 /*! Get all necessary information, in order to be able to reconstruct the net
2067 * if we read the same XML file. */
2068
2069 // Deep Net specific info
2070 Int_t depth = fNet->GetDepth();
2071
2072 Int_t inputDepth = fNet->GetInputDepth();
2073 Int_t inputHeight = fNet->GetInputHeight();
2074 Int_t inputWidth = fNet->GetInputWidth();
2075
2076 Int_t batchSize = fNet->GetBatchSize();
2077
2078 Int_t batchDepth = fNet->GetBatchDepth();
2079 Int_t batchHeight = fNet->GetBatchHeight();
2080 Int_t batchWidth = fNet->GetBatchWidth();
2081
2082 char lossFunction = static_cast<char>(fNet->GetLossFunction());
2083 char initialization = static_cast<char>(fNet->GetInitialization());
2084 char regularization = static_cast<char>(fNet->GetRegularization());
2085
2086 Double_t weightDecay = fNet->GetWeightDecay();
2087
2088 // Method specific info (not sure these are needed)
2089 char outputFunction = static_cast<char>(this->GetOutputFunction());
2090 //char lossFunction = static_cast<char>(this->GetLossFunction());
2091
2092 // Add attributes to the parent node
2093 xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2094
2095 xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2096 xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2097 xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2098
2099 xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2100 xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2101 xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2102 xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2103
2104 xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2105 xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2106 xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2107 xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2108
2109 gTools().AddAttr(nn, "WeightDecay", weightDecay);
2110
2111
2112 for (Int_t i = 0; i < depth; i++)
2113 {
2114 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2115 }
2116
2117
2118}
2119
2120////////////////////////////////////////////////////////////////////////////////
2122{
2123
2124 auto netXML = gTools().GetChild(rootXML, "Weights");
2125 if (!netXML){
2126 netXML = rootXML;
2127 }
2128
2129 size_t netDepth;
2130 gTools().ReadAttr(netXML, "NetDepth", netDepth);
2131
2132 size_t inputDepth, inputHeight, inputWidth;
2133 gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2134 gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2135 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2136
2137 size_t batchSize, batchDepth, batchHeight, batchWidth;
2138 gTools().ReadAttr(netXML, "BatchSize", batchSize);
2139 // use always batchsize = 1
2140 //batchSize = 1;
2141 gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2142 gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2143 gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2144
2145 char lossFunctionChar;
2146 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2147 char initializationChar;
2148 gTools().ReadAttr(netXML, "Initialization", initializationChar);
2149 char regularizationChar;
2150 gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2151 char outputFunctionChar;
2152 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2153 double weightDecay;
2154 gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2155
2156 // create the net
2157
2158 // DeepNetCpu_t is defined in MethodDL.h
2159 this->SetInputDepth(inputDepth);
2160 this->SetInputHeight(inputHeight);
2161 this->SetInputWidth(inputWidth);
2162 this->SetBatchDepth(batchDepth);
2163 this->SetBatchHeight(batchHeight);
2164 this->SetBatchWidth(batchWidth);
2165
2166
2167
2168 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2169 batchHeight, batchWidth,
2170 static_cast<ELossFunction>(lossFunctionChar),
2171 static_cast<EInitialization>(initializationChar),
2172 static_cast<ERegularization>(regularizationChar),
2173 weightDecay));
2174
2175 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
2176
2177
2178 //size_t previousWidth = inputWidth;
2179 auto layerXML = gTools().xmlengine().GetChild(netXML);
2180
2181 // loop on the layer and add them to the network
2182 for (size_t i = 0; i < netDepth; i++) {
2183
2184 TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2185
2186 // case of dense layer
2187 if (layerName == "DenseLayer") {
2188
2189 // read width and activation function and then we can create the layer
2190 size_t width = 0;
2191 gTools().ReadAttr(layerXML, "Width", width);
2192
2193 // Read activation function.
2194 TString funcString;
2195 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2196 EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2197
2198
2199 fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2200
2201 }
2202 // Convolutional Layer
2203 else if (layerName == "ConvLayer") {
2204
2205 // read width and activation function and then we can create the layer
2206 size_t depth = 0;
2207 gTools().ReadAttr(layerXML, "Depth", depth);
2208 size_t fltHeight, fltWidth = 0;
2209 size_t strideRows, strideCols = 0;
2210 size_t padHeight, padWidth = 0;
2211 gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2212 gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2213 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2214 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2215 gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2216 gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2217
2218 // Read activation function.
2219 TString funcString;
2220 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2221 EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2222
2223
2224 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2225 padHeight, padWidth, actFunction);
2226
2227 }
2228
2229 // MaxPool Layer
2230 else if (layerName == "MaxPoolLayer") {
2231
2232 // read maxpool layer info
2233 size_t filterHeight, filterWidth = 0;
2234 size_t strideRows, strideCols = 0;
2235 gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2236 gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2237 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2238 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2239
2240 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2241 }
2242 // Reshape Layer
2243 else if (layerName == "ReshapeLayer") {
2244
2245 // read reshape layer info
2246 size_t depth, height, width = 0;
2247 gTools().ReadAttr(layerXML, "Depth", depth);
2248 gTools().ReadAttr(layerXML, "Height", height);
2249 gTools().ReadAttr(layerXML, "Width", width);
2250 int flattening = 0;
2251 gTools().ReadAttr(layerXML, "Flattening",flattening );
2252
2253 fNet->AddReshapeLayer(depth, height, width, flattening);
2254
2255 }
2256 // RNN Layer
2257 else if (layerName == "RNNLayer") {
2258
2259 // read RNN layer info
2260 size_t stateSize,inputSize, timeSteps = 0;
2261 int rememberState= 0;
2262 int returnSequence = 0;
2263 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2264 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2265 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2266 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2267 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2268
2269 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2270
2271 }
2272 // LSTM Layer
2273 else if (layerName == "LSTMLayer") {
2274
2275 // read RNN layer info
2276 size_t stateSize,inputSize, timeSteps = 0;
2277 int rememberState, returnSequence = 0;
2278 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2279 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2280 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2281 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2282 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2283
2284 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2285
2286 }
2287 // GRU Layer
2288 else if (layerName == "GRULayer") {
2289
2290 // read RNN layer info
2291 size_t stateSize,inputSize, timeSteps = 0;
2292 int rememberState, returnSequence, resetGateAfter = 0;
2293 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2294 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2295 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2296 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2297 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2298 gTools().ReadAttr(layerXML, "ResetGateAfter", resetGateAfter);
2299
2300 if (!resetGateAfter && ArchitectureImpl_t::IsCudnn())
2301 Warning("ReadWeightsFromXML",
2302 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2303
2304 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2305 }
2306 // BatchNorm Layer
2307 else if (layerName == "BatchNormLayer") {
2308 // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2309 fNet->AddBatchNormLayer(0., 0.0);
2310 }
2311 // read weights and biases
2312 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2313
2314 // read next layer
2315 layerXML = gTools().GetNextChild(layerXML);
2316 }
2317
2318 fBuildNet = false;
2319 // create now the input and output matrices
2320 //int n1 = batchHeight;
2321 //int n2 = batchWidth;
2322 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2323 //if (fXInput.size() > 0) fXInput.clear();
2324 //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2326 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2327 // make here a ColumnMajor tensor
2330
2331 // create pointer to output matrix used for the predictions
2332 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2333
2334
2335}
2336
2337
2338////////////////////////////////////////////////////////////////////////////////
2339void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2340{
2341}
2342
2343////////////////////////////////////////////////////////////////////////////////
2345{
2346 // TODO
2347 return NULL;
2348}
2349
2350////////////////////////////////////////////////////////////////////////////////
2352{
2353 // TODO
2354}
2355
2356} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition: RSha256.hxx:103
unsigned int UInt_t
Definition: RtypesCore.h:46
const Bool_t kFALSE
Definition: RtypesCore.h:92
double Double_t
Definition: RtypesCore.h:59
long long Long64_t
Definition: RtypesCore.h:73
const Bool_t kTRUE
Definition: RtypesCore.h:91
#define ClassImp(name)
Definition: Rtypes.h:364
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:120
int type
Definition: TGX11.cxx:121
char * Form(const char *fmt,...)
The Formula class.
Definition: TFormula.h:87
Double_t Eval(Double_t x) const
Sets first variable (e.g. x) and evaluate formula.
Definition: TFormula.cxx:3305
UInt_t GetNCpu()
Definition: Config.h:72
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Definition: Configurable.h:168
MsgLogger & Log() const
Definition: Configurable.h:122
Adadelta Optimizer class.
Definition: Adadelta.h:45
Adagrad Optimizer class.
Definition: Adagrad.h:45
Adam Optimizer class.
Definition: Adam.h:45
static void PrintTensor(const Tensor_t &A, const std::string name="Cpu-tensor", bool truncate=false)
Definition: Cpu.h:862
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition: Cpu.h:108
static bool IsCudnn()
Definition: Cpu.h:131
Generic Deep Neural Network class.
Definition: DeepNet.h:73
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition: DeepNet.h:825
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:608
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:740
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:567
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:485
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:439
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:773
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:524
Generic layer class.
Definition: DenseLayer.h:59
RMSProp Optimizer class.
Definition: RMSProp.h:45
Stochastic Batch Gradient Descent Optimizer class.
Definition: SGD.h:46
Generic General Layer class.
Definition: GeneralLayer.h:51
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:395
Class that contains all the data information.
Definition: DataSetInfo.h:62
UInt_t GetNClasses() const
Definition: DataSetInfo.h:155
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:206
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:88
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:359
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:308
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
UInt_t GetClass() const
Definition: Event.h:86
std::vector< Float_t > & GetValues()
Definition: Event.h:94
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
const char * GetName() const
Definition: MethodBase.h:333
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:684
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
Definition: MethodBase.h:345
const TString & GetMethodName() const
Definition: MethodBase.h:330
const Event * GetEvent() const
Definition: MethodBase.h:749
DataSetInfo & DataInfo() const
Definition: MethodBase.h:409
UInt_t GetNVariables() const
Definition: MethodBase.h:344
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:593
UInt_t GetNvar() const
Definition: MethodBase.h:343
TrainingHistory fTrainHistory
Definition: MethodBase.h:425
DataSet * Data() const
Definition: MethodBase.h:408
IPythonInteractive * fInteractive
Definition: MethodBase.h:446
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition: MethodDL.h:108
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition: MethodDL.h:179
void GetHelpMessage() const
Definition: MethodDL.cxx:2351
DNN::ELossFunction fLossFunction
The loss function.
Definition: MethodDL.h:186
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDL.cxx:1997
std::vector< size_t > fInputShape
Contains the batch size (no.
Definition: MethodDL.h:174
TString fLayoutString
The string defining the layout of the deep net.
Definition: MethodDL.h:190
void SetInputDepth(int inputDepth)
Setters.
Definition: MethodDL.h:282
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:204
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1661
size_t GetBatchHeight() const
Definition: MethodDL.h:259
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
Definition: MethodDL.cxx:2023
TString fWeightInitializationString
The string defining the weight initialization method.
Definition: MethodDL.h:193
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:770
TensorImpl_t fXInput
Definition: MethodDL.h:202
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:182
TString fArchitectureString
The string defining the architecure: CPU or GPU.
Definition: MethodDL.h:194
void Init()
default initializations
Definition: MethodDL.cxx:432
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:1021
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1166
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:276
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:265
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:583
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition: MethodDL.h:269
void SetInputWidth(int inputWidth)
Definition: MethodDL.h:284
void ProcessOptions()
Definition: MethodDL.cxx:219
HostBufferImpl_t fXInputBuffer
Definition: MethodDL.h:203
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition: MethodDL.h:180
size_t GetInputDepth() const
Definition: MethodDL.h:251
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDL.cxx:1959
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:205
TString GetInputLayoutString() const
Definition: MethodDL.h:268
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:289
size_t GetInputHeight() const
Definition: MethodDL.h:252
TString GetArchitectureString() const
Definition: MethodDL.h:274
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:482
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:892
void ReadWeightsFromStream(std::istream &)
Definition: MethodDL.cxx:2339
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:2121
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition: MethodDL.h:195
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:93
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:185
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:184
size_t GetBatchDepth() const
Definition: MethodDL.h:258
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:933
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:200
size_t GetInputWidth() const
Definition: MethodDL.h:253
void SetInputShape(std::vector< size_t > inputShape)
Definition: MethodDL.h:285
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:266
TString fBatchLayoutString
The string defining the layout of the batch.
Definition: MethodDL.h:189
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:1093
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:671
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:831
TString fTrainingStrategyString
The string defining the training strategy.
Definition: MethodDL.h:192
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2344
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition: MethodDL.h:110
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:288
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:1054
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:290
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1829
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:264
void SetBatchSize(size_t batchSize)
Definition: MethodDL.h:287
TString GetLayoutString() const
Definition: MethodDL.h:270
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition: MethodDL.h:178
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition: MethodDL.h:106
size_t GetBatchWidth() const
Definition: MethodDL.h:260
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:2060
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:107
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1707
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:1047
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:439
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:197
void SetInputHeight(int inputHeight)
Definition: MethodDL.h:283
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:529
TString fErrorStrategy
The string defining the error strategy for training.
Definition: MethodDL.h:191
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:167
TString fInputLayoutString
The string defining the layout of the input.
Definition: MethodDL.h:188
EMsgType GetMinType() const
Definition: MsgLogger.h:71
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:146
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TXMLEngine & xmlengine()
Definition: Tools.h:268
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:353
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1235
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
Definition: Types.h:73
EAnalysisType
Definition: Types.h:128
@ kMulticlass
Definition: Types.h:131
@ kClassification
Definition: Types.h:129
@ kRegression
Definition: Types.h:130
@ kTraining
Definition: Types.h:145
void Print(Option_t *name="") const
Print the matrix as a table of elements.
TMatrixT.
Definition: TMatrixT.h:39
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:128
An array of TObjects.
Definition: TObjArray.h:37
Collectable string class.
Definition: TObjString.h:28
const TString & GetString() const
Definition: TObjString.h:46
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:879
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:893
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:552
Basic string class.
Definition: TString.h:136
Ssiz_t Length() const
Definition: TString.h:410
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1941
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1126
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:2007
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1811
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:519
const char * Data() const
Definition: TString.h:369
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:692
@ kTrailing
Definition: TString.h:267
@ kBoth
Definition: TString.h:267
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1158
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2217
Bool_t IsNull() const
Definition: TString.h:407
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2331
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:712
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
const Int_t n
Definition: legend1.C:16
#define I(x, y, z)
double T(double x)
Definition: ChebyshevPol.h:34
static const std::string separator("@@@")
EInitialization
Definition: Functions.h:72
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:82
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDL.cxx:75
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Bool_t IsNaN(Double_t x)
Definition: TMath.h:892
Double_t Log(Double_t x)
Definition: TMath.h:760
All of the options that can be specified in the training string.
Definition: MethodDL.h:72
std::map< TString, double > optimizerParams
Definition: MethodDL.h:84
DNN::EOptimizer optimizer
Definition: MethodDL.h:78
DNN::ERegularization regularization
Definition: MethodDL.h:77
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:83
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
auto * t1
Definition: textangle.C:20
REAL epsilon
Definition: triangle.c:617
static void output(int code)
Definition: gifencode.c:226