Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
MethodDL.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/cnn:$Id$Ndl
2// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3/**********************************************************************************
4 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5 * Package: TMVA *
6 * Class : MethodDL *
7 * *
8 * *
9 * Description: *
10 * Deep Neural Network Method *
11 * *
12 * Authors (alphabetical): *
13 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (see tmva/doc/LICENSE) *
26 **********************************************************************************/
27
28#include "TFormula.h"
29#include "TString.h"
30#include "TMath.h"
31#include "TObjString.h"
32
33#include "TMVA/Tools.h"
34#include "TMVA/Configurable.h"
35#include "TMVA/IMethod.h"
37#include "TMVA/MethodDL.h"
38#include "TMVA/Types.h"
40#include "TMVA/DNN/Functions.h"
42#include "TMVA/DNN/SGD.h"
43#include "TMVA/DNN/Adam.h"
44#include "TMVA/DNN/Adagrad.h"
45#include "TMVA/DNN/RMSProp.h"
46#include "TMVA/DNN/Adadelta.h"
47#include "TMVA/Timer.h"
48
49#ifdef R__HAS_TMVAGPU
51#ifdef R__HAS_CUDNN
53#endif
54#endif
55
56#include <chrono>
57
59
60using namespace TMVA::DNN::CNN;
61using namespace TMVA::DNN;
62
68
69
70namespace TMVA {
71
72
73////////////////////////////////////////////////////////////////////////////////
74TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
75{
76 key.ToUpper();
77 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
78 if (it == keyValueMap.end()) {
79 return TString("");
80 }
81 return it->second;
82}
83
84////////////////////////////////////////////////////////////////////////////////
85template <typename T>
86T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
87
88////////////////////////////////////////////////////////////////////////////////
89template <>
90int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
91{
92 TString value(fetchValueTmp(keyValueMap, key));
93 if (value == "") {
94 return defaultValue;
95 }
96 return value.Atoi();
97}
98
99////////////////////////////////////////////////////////////////////////////////
100template <>
101double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
102{
103 TString value(fetchValueTmp(keyValueMap, key));
104 if (value == "") {
105 return defaultValue;
106 }
107 return value.Atof();
108}
109
110////////////////////////////////////////////////////////////////////////////////
111template <>
112TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
113{
114 TString value(fetchValueTmp(keyValueMap, key));
115 if (value == "") {
116 return defaultValue;
117 }
118 return value;
119}
120
121////////////////////////////////////////////////////////////////////////////////
122template <>
123bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
124{
125 TString value(fetchValueTmp(keyValueMap, key));
126 if (value == "") {
127 return defaultValue;
128 }
129
130 value.ToUpper();
131 if (value == "TRUE" || value == "T" || value == "1") {
132 return true;
133 }
134
135 return false;
136}
137
138////////////////////////////////////////////////////////////////////////////////
139template <>
140std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
141 std::vector<double> defaultValue)
142{
143 TString parseString(fetchValueTmp(keyValueMap, key));
144 if (parseString == "") {
145 return defaultValue;
146 }
147
148 parseString.ToUpper();
149 std::vector<double> values;
150
151 const TString tokenDelim("+");
152 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
153 TIter nextToken(tokenStrings);
154 TObjString *tokenString = (TObjString *)nextToken();
155 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
156 std::stringstream sstr;
157 double currentValue;
158 sstr << tokenString->GetString().Data();
159 sstr >> currentValue;
160 values.push_back(currentValue);
161 }
162 return values;
163}
164
165////////////////////////////////////////////////////////////////////////////////
167{
168 // Set default values for all option strings
169
170 DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
171
172 DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
173
174 DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
175
176 DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
177 " or cross entropy (binary classification).");
178 AddPreDefVal(TString("CROSSENTROPY"));
179 AddPreDefVal(TString("SUMOFSQUARES"));
180 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
181
182 DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
183 AddPreDefVal(TString("XAVIER"));
184 AddPreDefVal(TString("XAVIERUNIFORM"));
185 AddPreDefVal(TString("GAUSS"));
186 AddPreDefVal(TString("UNIFORM"));
187 AddPreDefVal(TString("IDENTITY"));
188 AddPreDefVal(TString("ZERO"));
189
190 DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
191
192 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
193 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
194 "Specify as 100 to use exactly 100 events. (Default: 20%)");
195
196 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
197 AddPreDefVal(TString("STANDARD")); // deprecated and not supported anymore
198 AddPreDefVal(TString("CPU"));
199 AddPreDefVal(TString("GPU"));
200 AddPreDefVal(TString("OPENCL")); // not yet implemented
201 AddPreDefVal(TString("CUDNN")); // not needed (by default GPU is now CUDNN if available)
202
203 // define training strategy separated by a separator "|"
204 DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-3,"
205 "Momentum=0.0,"
206 "ConvergenceSteps=100,"
207 "MaxEpochs=2000,"
208 "Optimizer=ADAM,"
209 "BatchSize=30,"
210 "TestRepetitions=1,"
211 "WeightDecay=0.0,"
212 "Regularization=None,"
213 "DropConfig=0.0",
214 "TrainingStrategy", "Defines the training strategies.");
215}
216
217////////////////////////////////////////////////////////////////////////////////
219{
220
222 Log() << kINFO << "Will ignore negative events in training!" << Endl;
223 }
224
225 if (fArchitectureString == "STANDARD") {
226 Log() << kWARNING << "The STANDARD architecture is not supported anymore. "
227 "Please use Architecture=CPU or Architecture=CPU."
228 "See the TMVA Users' Guide for instructions if you "
229 "encounter problems."
230 << Endl;
231 Log() << kINFO << "We will use instead the CPU architecture" << Endl;
232 fArchitectureString = "CPU";
233 }
234 if (fArchitectureString == "OPENCL") {
235 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
236 "Please use Architecture=CPU or Architecture=CPU for the "
237 "time being. See the TMVA Users' Guide for instructions "
238 "if you encounter problems."
239 << Endl;
240 // use instead GPU
241 Log() << kINFO << "We will try using the GPU-CUDA architecture if available" << Endl;
242 fArchitectureString = "GPU";
243 }
244
245 // the architecture can now be set at runtime as an option
246
247
248 if (fArchitectureString == "GPU" || fArchitectureString == "CUDNN") {
249#ifdef R__HAS_TMVAGPU
250 Log() << kINFO << "Will now use the GPU architecture !" << Endl;
251#else // case TMVA does not support GPU
252 Log() << kERROR << "CUDA backend not enabled. Please make sure "
253 "you have CUDA installed and it was successfully "
254 "detected by CMAKE by using -Dtmva-gpu=On "
255 << Endl;
256 fArchitectureString = "CPU";
257 Log() << kINFO << "Will now use instead the CPU architecture !" << Endl;
258#endif
259 }
260
261 if (fArchitectureString == "CPU") {
262#ifdef R__HAS_TMVACPU // TMVA has CPU BLAS and IMT support
263 Log() << kINFO << "Will now use the CPU architecture with BLAS and IMT support !" << Endl;
264#else // TMVA has no CPU BLAS or IMT support
265 Log() << kINFO << "Multi-core CPU backend not enabled. For better performances, make sure "
266 "you have a BLAS implementation and it was successfully "
267 "detected by CMake as well that the imt CMake flag is set."
268 << Endl;
269 Log() << kINFO << "Will use anyway the CPU architecture but with slower performance" << Endl;
270#endif
271 }
272
273 // Input Layout
276
277 // Loss function and output.
280 if (fErrorStrategy == "SUMOFSQUARES") {
282 }
283 if (fErrorStrategy == "CROSSENTROPY") {
285 }
287 } else if (fAnalysisType == Types::kRegression) {
288 if (fErrorStrategy != "SUMOFSQUARES") {
289 Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
290 << " neural net error function. Setting error function to "
291 << " SUMOFSQUARES now." << Endl;
292 }
293
296 } else if (fAnalysisType == Types::kMulticlass) {
297 if (fErrorStrategy == "SUMOFSQUARES") {
299 }
300 if (fErrorStrategy == "CROSSENTROPY") {
302 }
303 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
305 }
307 }
308
309 // Initialization
310 // the biases will be always initialized to zero
311 if (fWeightInitializationString == "XAVIER") {
313 } else if (fWeightInitializationString == "XAVIERUNIFORM") {
315 } else if (fWeightInitializationString == "GAUSS") {
317 } else if (fWeightInitializationString == "UNIFORM") {
319 } else if (fWeightInitializationString == "ZERO") {
321 } else if (fWeightInitializationString == "IDENTITY") {
323 } else {
325 }
326
327 // Training settings.
328
330 for (auto &block : strategyKeyValues) {
331 TTrainingSettings settings;
332
333 settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
334 settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
335 settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
336 settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
337 settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
338 settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
339 settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
340 settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
341
342 TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
343 if (regularization == "L1") {
345 } else if (regularization == "L2") {
347 } else {
349 }
350
351 TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
352 settings.optimizerName = optimizer;
353 if (optimizer == "SGD") {
355 } else if (optimizer == "ADAM") {
357 } else if (optimizer == "ADAGRAD") {
359 } else if (optimizer == "RMSPROP") {
361 } else if (optimizer == "ADADELTA") {
363 } else {
364 // Make Adam as default choice if the input string is
365 // incorrect.
367 settings.optimizerName = "ADAM";
368 }
369 // check for specific optimizer parameters
370 std::vector<TString> optimParamLabels = {"_beta1", "_beta2", "_eps", "_rho"};
371 //default values
372 std::map<TString, double> defaultValues = {
373 {"ADADELTA_eps", 1.E-8}, {"ADADELTA_rho", 0.95},
374 {"ADAGRAD_eps", 1.E-8},
375 {"ADAM_beta1", 0.9}, {"ADAM_beta2", 0.999}, {"ADAM_eps", 1.E-7},
376 {"RMSPROP_eps", 1.E-7}, {"RMSPROP_rho", 0.9},
377 };
378 for (auto &pN : optimParamLabels) {
379 TString optimParamName = settings.optimizerName + pN;
380 // check if optimizer has default values for this specific parameters
381 if (defaultValues.count(optimParamName) > 0) {
382 double defValue = defaultValues[optimParamName];
383 double val = fetchValueTmp(block, optimParamName, defValue);
384 // create entry in settings for this optimizer parameter
385 settings.optimizerParams[optimParamName] = val;
386 }
387 }
388
389 fTrainingSettings.push_back(settings);
390 }
391
392 // this set fInputShape[0] = batchSize
393 this->SetBatchSize(fTrainingSettings.front().batchSize);
394
395 // case inputlayout and batch layout was not given. Use default then
396 // (1, batchsize, nvariables)
397 // fInputShape[0] -> BatchSize
398 // fInputShape[1] -> InputDepth
399 // fInputShape[2] -> InputHeight
400 // fInputShape[3] -> InputWidth
401 if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
402 fInputShape[1] = 1;
403 fInputShape[2] = 1;
405 }
406 // case when batch layout is not provided (all zero)
407 // batch layout can be determined by the input layout + batch size
408 // case DNN : { 1, B, W }
409 // case CNN : { B, C, H*W}
410 // case RNN : { B, T, H*W }
411
412 if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
413 // case first layer is DENSE
414 if (fInputShape[2] == 1 && fInputShape[1] == 1) {
415 // case of (1, batchsize, input features)
416 fBatchDepth = 1;
417 fBatchHeight = fTrainingSettings.front().batchSize;
419 }
420 else { // more general cases (e.g. for CNN)
421 // case CONV or RNN
422 fBatchDepth = fTrainingSettings.front().batchSize;
425 }
426 }
427}
428
429////////////////////////////////////////////////////////////////////////////////
430/// default initializations
432{
433 // Nothing to do here
434}
435
436////////////////////////////////////////////////////////////////////////////////
437/// Parse the input layout
439{
440 // Define the delimiter
441 const TString delim("|");
442
443 // Get the input layout string
444 TString inputLayoutString = this->GetInputLayoutString();
445
446 // Split the input layout string
447 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
448 TIter nextInputDim(inputDimStrings);
449 TObjString *inputDimString = (TObjString *)nextInputDim();
450
451 // Go through every token and save its absolute value in the shape array
452 // The first token is the batch size for easy compatibility with cudnn
453 int subDim = 1;
454 std::vector<size_t> inputShape;
455 inputShape.reserve(inputLayoutString.Length()/2 + 2);
456 inputShape.push_back(0); // Will be set later by Trainingsettings, use 0 value now
457 for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
458 // size_t is unsigned
459 subDim = (size_t) abs(inputDimString->GetString().Atoi());
460 // Size among unused dimensions should be set to 1 for cudnn
461 //if (subDim == 0) subDim = 1;
462 inputShape.push_back(subDim);
463 }
464 // it is expected that empty Shape has at least 4 dimensions. We pad the missing one's with 1
465 // for example in case of dense layer input layouts
466 // when we will support 3D convolutions we would need to add extra 1's
467 if (inputShape.size() == 2) {
468 // case of dense layer where only width is specified
469 inputShape = {inputShape[0], 1, 1, inputShape[1]};
470 }
471 else if (inputShape.size() == 3) {
472 //e.g. case of RNN T,W -> T,1,W
473 inputShape = {inputShape[0], inputShape[1], 1, inputShape[2]};
474 }
475
476 this->SetInputShape(inputShape);
477}
478
479////////////////////////////////////////////////////////////////////////////////
480/// Parse the input layout
482{
483 // Define the delimiter
484 const TString delim("|");
485
486 // Get the input layout string
487 TString batchLayoutString = this->GetBatchLayoutString();
488
489 size_t batchDepth = 0;
490 size_t batchHeight = 0;
491 size_t batchWidth = 0;
492
493 // Split the input layout string
494 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
495 TIter nextBatchDim(batchDimStrings);
496 TObjString *batchDimString = (TObjString *)nextBatchDim();
497 int idxToken = 0;
498
499 for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
500 switch (idxToken) {
501 case 0: // input depth
502 {
503 TString strDepth(batchDimString->GetString());
504 batchDepth = (size_t)strDepth.Atoi();
505 } break;
506 case 1: // input height
507 {
508 TString strHeight(batchDimString->GetString());
509 batchHeight = (size_t)strHeight.Atoi();
510 } break;
511 case 2: // input width
512 {
513 TString strWidth(batchDimString->GetString());
514 batchWidth = (size_t)strWidth.Atoi();
515 } break;
516 }
517 ++idxToken;
518 }
519
520 this->SetBatchDepth(batchDepth);
521 this->SetBatchHeight(batchHeight);
522 this->SetBatchWidth(batchWidth);
523}
524
525////////////////////////////////////////////////////////////////////////////////
526/// Create a deep net based on the layout string
527template <typename Architecture_t, typename Layer_t>
530{
531 // Layer specification, layer details
532 const TString layerDelimiter(",");
533 const TString subDelimiter("|");
534
535 TString layoutString = this->GetLayoutString();
536
537 //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
538
539 // Split layers
540 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
541 TIter nextLayer(layerStrings);
542 TObjString *layerString = (TObjString *)nextLayer();
543
544
545 for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
546
547 // Split layer details
548 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
549 TIter nextToken(subStrings);
550 TObjString *token = (TObjString *)nextToken();
551
552 // Determine the type of the layer
553 TString strLayerType = token->GetString();
554
555
556 if (strLayerType == "DENSE") {
557 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
558 } else if (strLayerType == "CONV") {
559 ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
560 } else if (strLayerType == "MAXPOOL") {
561 ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
562 } else if (strLayerType == "RESHAPE") {
563 ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
564 } else if (strLayerType == "BNORM") {
565 ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
566 } else if (strLayerType == "RNN") {
567 ParseRecurrentLayer(kLayerRNN, deepNet, nets, layerString->GetString(), subDelimiter);
568 } else if (strLayerType == "LSTM") {
569 ParseRecurrentLayer(kLayerLSTM, deepNet, nets, layerString->GetString(), subDelimiter);
570 } else if (strLayerType == "GRU") {
571 ParseRecurrentLayer(kLayerGRU, deepNet, nets, layerString->GetString(), subDelimiter);
572 } else {
573 // no type of layer specified - assume is dense layer as in old DNN interface
574 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
575 }
576 }
577}
578
579////////////////////////////////////////////////////////////////////////////////
580/// Pases the layer string and creates the appropriate dense layer
581template <typename Architecture_t, typename Layer_t>
583 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
584 TString delim)
585{
586 int width = 0;
588
589 // this return number of input variables for the method
590 // it can be used to deduce width of dense layer if specified as N+10
591 // where N is the number of input variables
592 const size_t inputSize = GetNvar();
593
594 // Split layer details
595 TObjArray *subStrings = layerString.Tokenize(delim);
596 TIter nextToken(subStrings);
597 TObjString *token = (TObjString *)nextToken();
598
599 // loop on the tokens
600 // order of sepcifying width and activation function is not relevant
601 // both 100|TANH and TANH|100 are valid cases
602 for (; token != nullptr; token = (TObjString *)nextToken()) {
603 // try a match with the activation function
604 TString strActFnc(token->GetString());
605 // if first token defines the layer type- skip it
606 if (strActFnc =="DENSE") continue;
607
608 if (strActFnc == "RELU") {
609 activationFunction = DNN::EActivationFunction::kRelu;
610 } else if (strActFnc == "TANH") {
611 activationFunction = DNN::EActivationFunction::kTanh;
612 } else if (strActFnc == "FTANH") {
613 activationFunction = DNN::EActivationFunction::kFastTanh;
614 } else if (strActFnc == "SYMMRELU") {
615 activationFunction = DNN::EActivationFunction::kSymmRelu;
616 } else if (strActFnc == "SOFTSIGN") {
617 activationFunction = DNN::EActivationFunction::kSoftSign;
618 } else if (strActFnc == "SIGMOID") {
619 activationFunction = DNN::EActivationFunction::kSigmoid;
620 } else if (strActFnc == "LINEAR") {
621 activationFunction = DNN::EActivationFunction::kIdentity;
622 } else if (strActFnc == "GAUSS") {
623 activationFunction = DNN::EActivationFunction::kGauss;
624 } else if (width == 0) {
625 // no match found try to parse as text showing the width
626 // support for input a formula where the variable 'x' is 'N' in the string
627 // use TFormula for the evaluation
628 TString strNumNodes = strActFnc;
629 // number of nodes
630 TString strN("x");
631 strNumNodes.ReplaceAll("N", strN);
632 strNumNodes.ReplaceAll("n", strN);
633 TFormula fml("tmp", strNumNodes);
634 width = fml.Eval(inputSize);
635 }
636 }
637 // avoid zero width. assume is last layer and give width = output width
638 // Determine the number of outputs
639 size_t outputSize = 1;
641 outputSize = GetNTargets();
642 } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
643 outputSize = DataInfo().GetNClasses();
644 }
645 if (width == 0) width = outputSize;
646
647 // Add the dense layer, initialize the weights and biases and copy
648 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
649 denseLayer->Initialize();
650
651 // add same layer to fNet
652 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
653
654 //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
655
656 // add the copy to all slave nets
657 //for (size_t i = 0; i < nets.size(); i++) {
658 // nets[i].AddDenseLayer(copyDenseLayer);
659 //}
660
661 // check compatibility of added layer
662 // for a dense layer input should be 1 x 1 x DxHxW
663}
664
665////////////////////////////////////////////////////////////////////////////////
666/// Pases the layer string and creates the appropriate convolutional layer
667template <typename Architecture_t, typename Layer_t>
669 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
670 TString delim)
671{
672 int depth = 0;
673 int fltHeight = 0;
674 int fltWidth = 0;
675 int strideRows = 0;
676 int strideCols = 0;
677 int zeroPadHeight = 0;
678 int zeroPadWidth = 0;
680
681 // Split layer details
682 TObjArray *subStrings = layerString.Tokenize(delim);
683 TIter nextToken(subStrings);
684 TObjString *token = (TObjString *)nextToken();
685 int idxToken = 0;
686
687 for (; token != nullptr; token = (TObjString *)nextToken()) {
688 switch (idxToken) {
689 case 1: // depth
690 {
691 TString strDepth(token->GetString());
692 depth = strDepth.Atoi();
693 } break;
694 case 2: // filter height
695 {
696 TString strFltHeight(token->GetString());
697 fltHeight = strFltHeight.Atoi();
698 } break;
699 case 3: // filter width
700 {
701 TString strFltWidth(token->GetString());
702 fltWidth = strFltWidth.Atoi();
703 } break;
704 case 4: // stride in rows
705 {
706 TString strStrideRows(token->GetString());
707 strideRows = strStrideRows.Atoi();
708 } break;
709 case 5: // stride in cols
710 {
711 TString strStrideCols(token->GetString());
712 strideCols = strStrideCols.Atoi();
713 } break;
714 case 6: // zero padding height
715 {
716 TString strZeroPadHeight(token->GetString());
717 zeroPadHeight = strZeroPadHeight.Atoi();
718 } break;
719 case 7: // zero padding width
720 {
721 TString strZeroPadWidth(token->GetString());
722 zeroPadWidth = strZeroPadWidth.Atoi();
723 } break;
724 case 8: // activation function
725 {
726 TString strActFnc(token->GetString());
727 if (strActFnc == "RELU") {
728 activationFunction = DNN::EActivationFunction::kRelu;
729 } else if (strActFnc == "TANH") {
730 activationFunction = DNN::EActivationFunction::kTanh;
731 } else if (strActFnc == "SYMMRELU") {
732 activationFunction = DNN::EActivationFunction::kSymmRelu;
733 } else if (strActFnc == "SOFTSIGN") {
734 activationFunction = DNN::EActivationFunction::kSoftSign;
735 } else if (strActFnc == "SIGMOID") {
736 activationFunction = DNN::EActivationFunction::kSigmoid;
737 } else if (strActFnc == "LINEAR") {
738 activationFunction = DNN::EActivationFunction::kIdentity;
739 } else if (strActFnc == "GAUSS") {
740 activationFunction = DNN::EActivationFunction::kGauss;
741 }
742 } break;
743 }
744 ++idxToken;
745 }
746
747 // Add the convolutional layer, initialize the weights and biases and copy
748 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
749 zeroPadHeight, zeroPadWidth, activationFunction);
750 convLayer->Initialize();
751
752 // Add same layer to fNet
753 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
754 zeroPadHeight, zeroPadWidth, activationFunction);
755
756 //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
757
758 //// add the copy to all slave nets
759 //for (size_t i = 0; i < nets.size(); i++) {
760 // nets[i].AddConvLayer(copyConvLayer);
761 //}
762}
763
764////////////////////////////////////////////////////////////////////////////////
765/// Pases the layer string and creates the appropriate max pool layer
766template <typename Architecture_t, typename Layer_t>
768 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
769 TString delim)
770{
771
772 int filterHeight = 0;
773 int filterWidth = 0;
774 int strideRows = 0;
775 int strideCols = 0;
776
777 // Split layer details
778 TObjArray *subStrings = layerString.Tokenize(delim);
779 TIter nextToken(subStrings);
780 TObjString *token = (TObjString *)nextToken();
781 int idxToken = 0;
782
783 for (; token != nullptr; token = (TObjString *)nextToken()) {
784 switch (idxToken) {
785 case 1: // filter height
786 {
787 TString strFrmHeight(token->GetString());
788 filterHeight = strFrmHeight.Atoi();
789 } break;
790 case 2: // filter width
791 {
792 TString strFrmWidth(token->GetString());
793 filterWidth = strFrmWidth.Atoi();
794 } break;
795 case 3: // stride in rows
796 {
797 TString strStrideRows(token->GetString());
798 strideRows = strStrideRows.Atoi();
799 } break;
800 case 4: // stride in cols
801 {
802 TString strStrideCols(token->GetString());
803 strideCols = strStrideCols.Atoi();
804 } break;
805 }
806 ++idxToken;
807 }
808
809 // Add the Max pooling layer
810 // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
811 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
812
813 // Add the same layer to fNet
814 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
815
816
817 //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
818
819 //// add the copy to all slave nets
820 //for (size_t i = 0; i < nets.size(); i++) {
821 // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
822 //}
823}
824
825////////////////////////////////////////////////////////////////////////////////
826/// Pases the layer string and creates the appropriate reshape layer
827template <typename Architecture_t, typename Layer_t>
829 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
830 TString delim)
831{
832 int depth = 0;
833 int height = 0;
834 int width = 0;
835 bool flattening = false;
836
837 // Split layer details
838 TObjArray *subStrings = layerString.Tokenize(delim);
839 TIter nextToken(subStrings);
840 TObjString *token = (TObjString *)nextToken();
841 int idxToken = 0;
842
843 for (; token != nullptr; token = (TObjString *)nextToken()) {
844 if (token->GetString() == "FLAT") idxToken=4;
845 switch (idxToken) {
846 case 1: {
847 TString strDepth(token->GetString());
848 depth = strDepth.Atoi();
849 } break;
850 case 2: // height
851 {
852 TString strHeight(token->GetString());
853 height = strHeight.Atoi();
854 } break;
855 case 3: // width
856 {
857 TString strWidth(token->GetString());
858 width = strWidth.Atoi();
859 } break;
860 case 4: // flattening
861 {
862 TString flat(token->GetString());
863 if (flat == "FLAT") {
864 flattening = true;
865 }
866 } break;
867 }
868 ++idxToken;
869 }
870
871 // Add the reshape layer
872 // TReshapeLayer<Architecture_t> *reshapeLayer =
873 deepNet.AddReshapeLayer(depth, height, width, flattening);
874
875 // Add the same layer to fNet
876 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
877
878 //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
879
880 //// add the copy to all slave nets
881 //for (size_t i = 0; i < nets.size(); i++) {
882 // nets[i].AddReshapeLayer(copyReshapeLayer);
883 //}
884}
885
886////////////////////////////////////////////////////////////////////////////////
887/// Pases the layer string and creates the appropriate reshape layer
888template <typename Architecture_t, typename Layer_t>
890 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
891 TString delim)
892{
893
894 // default values
895 double momentum = -1; //0.99;
896 double epsilon = 0.0001;
897
898 // Split layer details
899 TObjArray *subStrings = layerString.Tokenize(delim);
900 TIter nextToken(subStrings);
901 TObjString *token = (TObjString *)nextToken();
902 int idxToken = 0;
903
904 for (; token != nullptr; token = (TObjString *)nextToken()) {
905 switch (idxToken) {
906 case 1: {
907 momentum = std::atof(token->GetString().Data());
908 } break;
909 case 2: // height
910 {
911 epsilon = std::atof(token->GetString().Data());
912 } break;
913 }
914 ++idxToken;
915 }
916
917 // Add the batch norm layer
918 //
919 auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
920 layer->Initialize();
921
922 // Add the same layer to fNet
923 if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
924
925}
926
927////////////////////////////////////////////////////////////////////////////////
928/// Pases the layer string and creates the appropriate rnn layer
929template <typename Architecture_t, typename Layer_t>
931 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
932 TString delim)
933{
934 // int depth = 0;
935 int stateSize = 0;
936 int inputSize = 0;
937 int timeSteps = 0;
938 bool rememberState = false;
939 bool returnSequence = false;
940 bool resetGateAfter = false;
941
942 // Split layer details
943 TObjArray *subStrings = layerString.Tokenize(delim);
944 TIter nextToken(subStrings);
945 TObjString *token = (TObjString *)nextToken();
946 int idxToken = 0;
947
948 for (; token != nullptr; token = (TObjString *)nextToken()) {
949 switch (idxToken) {
950 case 1: // state size
951 {
952 TString strstateSize(token->GetString());
953 stateSize = strstateSize.Atoi();
954 break;
955 }
956 case 2: // input size
957 {
958 TString strinputSize(token->GetString());
959 inputSize = strinputSize.Atoi();
960 break;
961 }
962 case 3: // time steps
963 {
964 TString strtimeSteps(token->GetString());
965 timeSteps = strtimeSteps.Atoi();
966 break;
967 }
968 case 4: // returnSequence (option stateful in Keras)
969 {
970 TString strrememberState(token->GetString());
971 rememberState = (bool) strrememberState.Atoi();
972 break;
973 }
974 case 5: // return full output sequence (1 or 0)
975 {
976 TString str(token->GetString());
977 returnSequence = (bool)str.Atoi();
978 break;
979 }
980 case 6: // resetGate after option (only for GRU)
981 {
982 TString str(token->GetString());
983 resetGateAfter = (bool)str.Atoi();
984 }
985 }
986 ++idxToken;
987 }
988
989 // Add the recurrent layer, initialize the weights and biases and copy
990 if (rnnType == kLayerRNN) {
991 auto * recurrentLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
992 recurrentLayer->Initialize();
993 // Add same layer to fNet
994 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
995 }
996 else if (rnnType == kLayerLSTM ) {
997 auto *recurrentLayer = deepNet.AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
998 recurrentLayer->Initialize();
999 // Add same layer to fNet
1000 if (fBuildNet)
1001 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1002 }
1003 else if (rnnType == kLayerGRU) {
1004 if (Architecture_t::IsCudnn()) resetGateAfter = true; // needed for Cudnn
1005 auto *recurrentLayer = deepNet.AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1006 recurrentLayer->Initialize();
1007 // Add same layer to fNet
1008 if (fBuildNet)
1009 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1010 }
1011 else {
1012 Log() << kFATAL << "Invalid Recurrent layer type " << Endl;
1013 }
1014}
1015
1016////////////////////////////////////////////////////////////////////////////////
1017/// Standard constructor.
1018MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
1019 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1024 fXInput()
1025{
1026 // Nothing to do here
1027}
1028
1029////////////////////////////////////////////////////////////////////////////////
1030/// Constructor from a weight file.
1031MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
1032 : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1037 fXInput()
1038{
1039 // Nothing to do here
1040}
1041
1042////////////////////////////////////////////////////////////////////////////////
1043/// Destructor.
1045{
1046 // Nothing to do here
1047}
1048
1049////////////////////////////////////////////////////////////////////////////////
1050/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1051auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1052{
1053 // remove empty spaces
1054 parseString.ReplaceAll(" ","");
1055 KeyValueVector_t blockKeyValues;
1056 const TString keyValueDelim("=");
1057
1058 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1059 TIter nextBlock(blockStrings);
1060 TObjString *blockString = (TObjString *)nextBlock();
1061
1062 for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1063 blockKeyValues.push_back(std::map<TString, TString>());
1064 std::map<TString, TString> &currentBlock = blockKeyValues.back();
1065
1066 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1067 TIter nextToken(subStrings);
1068 TObjString *token = (TObjString *)nextToken();
1069
1070 for (; token != nullptr; token = (TObjString *)nextToken()) {
1071 TString strKeyValue(token->GetString());
1072 int delimPos = strKeyValue.First(keyValueDelim.Data());
1073 if (delimPos <= 0) continue;
1074
1075 TString strKey = TString(strKeyValue(0, delimPos));
1076 strKey.ToUpper();
1077 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1078
1079 strKey.Strip(TString::kBoth, ' ');
1080 strValue.Strip(TString::kBoth, ' ');
1081
1082 currentBlock.insert(std::make_pair(strKey, strValue));
1083 }
1084 }
1085 return blockKeyValues;
1086}
1087
1088////////////////////////////////////////////////////////////////////////////////
1089/// What kind of analysis type can handle the CNN
1090Bool_t MethodDL::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/)
1091{
1092 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1093 if (type == Types::kMulticlass) return kTRUE;
1094 if (type == Types::kRegression) return kTRUE;
1095
1096 return kFALSE;
1097}
1098
1099////////////////////////////////////////////////////////////////////////////////
1100/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1101/// 100 etc.
1102/// - 20% and 0.2 selects 20% of the training set as validation data.
1103/// - 100 selects 100 events as the validation data.
1104///
1105/// @return number of samples in validation set
1106///
1108{
1109 Int_t nValidationSamples = 0;
1110 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1111
1112 // Parsing + Validation
1113 // --------------------
1114 if (fNumValidationString.EndsWith("%")) {
1115 // Relative spec. format 20%
1116 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1117
1118 if (intValStr.IsFloat()) {
1119 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1120 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1121 } else {
1122 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1123 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1124 }
1125 } else if (fNumValidationString.IsFloat()) {
1126 Double_t valSizeAsDouble = fNumValidationString.Atof();
1127
1128 if (valSizeAsDouble < 1.0) {
1129 // Relative spec. format 0.2
1130 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1131 } else {
1132 // Absolute spec format 100 or 100.0
1133 nValidationSamples = valSizeAsDouble;
1134 }
1135 } else {
1136 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1137 << Endl;
1138 }
1139
1140 // Value validation
1141 // ----------------
1142 if (nValidationSamples < 0) {
1143 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1144 }
1145
1146 if (nValidationSamples == 0) {
1147 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1148 }
1149
1150 if (nValidationSamples >= (Int_t)trainingSetSize) {
1151 Log() << kFATAL << "Validation size \"" << fNumValidationString
1152 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1153 }
1154
1155 return nValidationSamples;
1156}
1157
1158
1159////////////////////////////////////////////////////////////////////////////////
1160/// Implementation of architecture specific train method
1161///
1162template <typename Architecture_t>
1164{
1165
1166 using Scalar_t = typename Architecture_t::Scalar_t;
1169 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1170
1171 bool debug = Log().GetMinType() == kDEBUG;
1172
1173
1174 // set the random seed for weight initialization
1175 Architecture_t::SetRandomSeed(fRandomSeed);
1176
1177 ///split training data in training and validation data
1178 // and determine the number of training and testing examples
1179
1180 size_t nValidationSamples = GetNumValidationSamples();
1181 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1182
1183 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1184 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1185 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1186
1187 size_t trainingPhase = 1;
1188
1189 for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1190
1191 size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1192
1193
1194 // After the processing of the options, initialize the master deep net
1195 size_t batchSize = settings.batchSize;
1196 this->SetBatchSize(batchSize);
1197 // Should be replaced by actual implementation. No support for this now.
1198 size_t inputDepth = this->GetInputDepth();
1199 size_t inputHeight = this->GetInputHeight();
1200 size_t inputWidth = this->GetInputWidth();
1201 size_t batchDepth = this->GetBatchDepth();
1202 size_t batchHeight = this->GetBatchHeight();
1203 size_t batchWidth = this->GetBatchWidth();
1204 ELossFunction J = this->GetLossFunction();
1206 ERegularization R = settings.regularization;
1207 EOptimizer O = settings.optimizer;
1208 Scalar_t weightDecay = settings.weightDecay;
1209
1210 //Batch size should be included in batch layout as well. There are two possibilities:
1211 // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1212 // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1213 // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1214 // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1215
1216 if (batchDepth != batchSize && batchDepth > 1) {
1217 Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1218 return;
1219 }
1220 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1221 Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1222 return;
1223 }
1224
1225
1226 //check also that input layout compatible with batch layout
1227 bool badLayout = false;
1228 // case batch depth == batch size
1229 if (batchDepth == batchSize)
1230 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1231 // case batch Height is batch size
1232 if (batchHeight == batchSize && batchDepth == 1)
1233 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1234 if (badLayout) {
1235 Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1236 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1237 return;
1238 }
1239
1240 // check batch size is compatible with number of events
1241 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1242 Log() << kFATAL << "Number of samples in the datasets are train: ("
1243 << nTrainingSamples << ") test: (" << nValidationSamples
1244 << "). One of these is smaller than the batch size of "
1245 << settings.batchSize << ". Please increase the batch"
1246 << " size to be at least the same size as the smallest"
1247 << " of them." << Endl;
1248 }
1249
1250 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1251
1252 // create a copy of DeepNet for evaluating but with batch size = 1
1253 // fNet is the saved network and will be with CPU or Referrence architecture
1254 if (trainingPhase == 1) {
1255 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1256 batchHeight, batchWidth, J, I, R, weightDecay));
1257 fBuildNet = true;
1258 }
1259 else
1260 fBuildNet = false;
1261
1262 // Initialize the vector of slave nets
1263 std::vector<DeepNet_t> nets{};
1264 nets.reserve(nThreads);
1265 for (size_t i = 0; i < nThreads; i++) {
1266 // create a copies of the master deep net
1267 nets.push_back(deepNet);
1268 }
1269
1270
1271 // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1272 CreateDeepNet(deepNet, nets);
1273
1274
1275 // set droput probabilities
1276 // use convention to store in the layer 1.- dropout probabilities
1277 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1278 for (auto & p : dropoutVector) {
1279 p = 1.0 - p;
1280 }
1281 deepNet.SetDropoutProbabilities(dropoutVector);
1282
1283 if (trainingPhase > 1) {
1284 // copy initial weights from fNet to deepnet
1285 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1286 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1287 }
1288 }
1289
1290 // when fNet is built create also input matrix that will be used to evaluate it
1291 if (fBuildNet) {
1292 //int n1 = batchHeight;
1293 //int n2 = batchWidth;
1294 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1295 //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1296 //fXInput = TensorImpl_t(1,n1,n2);
1298 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1299 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1300 fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1301
1302
1303 // create pointer to output matrix used for the predictions
1304 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1305
1306 // print the created network
1307 Log() << "***** Deep Learning Network *****" << Endl;
1308 if (Log().GetMinType() <= kINFO)
1309 deepNet.Print();
1310 }
1311 Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1312
1313 // Loading the training and validation datasets
1314 TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1315 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1316 {inputDepth, inputHeight, inputWidth},
1317 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1318 deepNet.GetOutputWidth(), nThreads);
1319
1320 TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1321 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1322 {inputDepth, inputHeight, inputWidth},
1323 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1324 deepNet.GetOutputWidth(), nThreads);
1325
1326
1327
1328 // do an evaluation of the network to compute initial minimum test error
1329
1330 Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1331
1332 Double_t minValError = 0.0;
1333 Log() << "Compute initial loss on the validation data " << Endl;
1334 for (auto batch : validationData) {
1335 auto inputTensor = batch.GetInput();
1336 auto outputMatrix = batch.GetOutput();
1337 auto weights = batch.GetWeights();
1338
1339 //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1340 // should we apply droput to the loss ??
1341 minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1342 }
1343 // add Regularization term
1344 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1345 minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1346 minValError += regzTerm;
1347
1348
1349 // create a pointer to base class VOptimizer
1350 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1351
1352 // initialize the base class pointer with the corresponding derived class object.
1353 switch (O) {
1354
1355 case EOptimizer::kSGD:
1356 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1357 new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1358 break;
1359
1360 case EOptimizer::kAdam: {
1361 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1363 deepNet, settings.learningRate, settings.optimizerParams["ADAM_beta1"],
1364 settings.optimizerParams["ADAM_beta2"], settings.optimizerParams["ADAM_eps"]));
1365 break;
1366 }
1367
1369 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1370 new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1371 settings.optimizerParams["ADAGRAD_eps"]));
1372 break;
1373
1375 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1376 new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum,
1377 settings.optimizerParams["RMSPROP_rho"],
1378 settings.optimizerParams["RMSPROP_eps"]));
1379 break;
1380
1382 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1383 new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1384 settings.optimizerParams["ADADELTA_rho"],
1385 settings.optimizerParams["ADADELTA_eps"]));
1386 break;
1387 }
1388
1389
1390 // Initialize the vector of batches, one batch for one slave network
1391 std::vector<TTensorBatch<Architecture_t>> batches{};
1392
1393 bool converged = false;
1394 size_t convergenceCount = 0;
1395 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1396
1397 // start measuring
1398 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1399 tstart = std::chrono::system_clock::now();
1400
1401 // function building string with optimizer parameters values for logging
1402 auto optimParametersString = [&]() {
1403 TString optimParameters;
1404 for ( auto & element : settings.optimizerParams) {
1405 TString key = element.first;
1406 key.ReplaceAll(settings.optimizerName + "_", ""); // strip optimizerName_
1407 double value = element.second;
1408 if (!optimParameters.IsNull())
1409 optimParameters += ",";
1410 else
1411 optimParameters += " (";
1412 optimParameters += TString::Format("%s=%g", key.Data(), value);
1413 }
1414 if (!optimParameters.IsNull())
1415 optimParameters += ")";
1416 return optimParameters;
1417 };
1418
1419 Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1420 << " Optimizer " << settings.optimizerName
1421 << optimParametersString()
1422 << " Learning rate = " << settings.learningRate << " regularization " << (char)settings.regularization
1423 << " minimum error = " << minValError << Endl;
1424 if (!fInteractive) {
1425 std::string separator(62, '-');
1426 Log() << separator << Endl;
1427 Log() << std::setw(10) << "Epoch"
1428 << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err." << std::setw(12)
1429 << "t(s)/epoch" << std::setw(12) << "t(s)/Loss" << std::setw(12) << "nEvents/s" << std::setw(12)
1430 << "Conv. Steps" << Endl;
1431 Log() << separator << Endl;
1432 }
1433
1434 // set up generator for shuffling the batches
1435 // if seed is zero we have always a different order in the batches
1436 size_t shuffleSeed = 0;
1437 if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1438 RandomGenerator<TRandom3> rng(shuffleSeed);
1439
1440 // print weights before
1441 if (fBuildNet && debug) {
1442 Log() << "Initial Deep Net Weights " << Endl;
1443 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1444 for (size_t l = 0; l < weights_tensor.size(); ++l)
1445 weights_tensor[l].Print();
1446 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1447 bias_tensor[0].Print();
1448 }
1449
1450 Log() << " Start epoch iteration ..." << Endl;
1451 bool debugFirstEpoch = false;
1452 bool computeLossInTraining = true; // compute loss in training or at test time
1453 size_t nTrainEpochs = 0;
1454 while (!converged) {
1455 nTrainEpochs++;
1456 trainingData.Shuffle(rng);
1457
1458 // execute all epochs
1459 //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1460
1461 Double_t trainingError = 0;
1462 for (size_t i = 0; i < batchesInEpoch; ++i ) {
1463 // Clean and load new batches, one batch for one slave net
1464 //batches.clear();
1465 //batches.reserve(nThreads);
1466 //for (size_t j = 0; j < nThreads; j++) {
1467 // batches.push_back(trainingData.GetTensorBatch());
1468 //}
1469 if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1470
1471 auto my_batch = trainingData.GetTensorBatch();
1472
1473 if (debugFirstEpoch)
1474 std::cout << "got batch data - doing forward \n";
1475
1476#ifdef DEBUG
1477
1478 Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1479 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1480 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1481 Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1482 Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1483#endif
1484
1485 deepNet.Forward(my_batch.GetInput(), true);
1486 // compute also loss
1487 if (computeLossInTraining) {
1488 auto outputMatrix = my_batch.GetOutput();
1489 auto weights = my_batch.GetWeights();
1490 trainingError += deepNet.Loss(outputMatrix, weights, false);
1491 }
1492
1493 if (debugFirstEpoch)
1494 std::cout << "- doing backward \n";
1495
1496#ifdef DEBUG
1497 size_t nlayers = deepNet.GetLayers().size();
1498 for (size_t l = 0; l < nlayers; ++l) {
1499 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1500 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1501 TString::Format("initial weights layer %d", l).Data());
1502
1503 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1504 TString::Format("output tensor layer %d", l).Data());
1505 }
1506#endif
1507
1508 //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1509
1510 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1511
1512 if (debugFirstEpoch)
1513 std::cout << "- doing optimizer update \n";
1514
1515 // increment optimizer step that is used in some algorithms (e.g. ADAM)
1516 optimizer->IncrementGlobalStep();
1517 optimizer->Step();
1518
1519#ifdef DEBUG
1520 std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1521 for (size_t l = 0; l < nlayers; ++l) {
1522 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1523 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1524 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1525 }
1526 }
1527#endif
1528
1529 }
1530
1531 if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1532 //}
1533 debugFirstEpoch = false;
1534 if ((nTrainEpochs % settings.testInterval) == 0) {
1535
1536 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1537
1538 t1 = std::chrono::system_clock::now();
1539
1540 // Compute validation error.
1541
1542
1543 Double_t valError = 0.0;
1544 bool inTraining = false;
1545 for (auto batch : validationData) {
1546 auto inputTensor = batch.GetInput();
1547 auto outputMatrix = batch.GetOutput();
1548 auto weights = batch.GetWeights();
1549 // should we apply droput to the loss ??
1550 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1551 }
1552 // normalize loss to number of batches and add regularization term
1553 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1554 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1555 valError += regTerm;
1556
1557 //Log the loss value
1558 fTrainHistory.AddValue("valError",nTrainEpochs,valError);
1559
1560 t2 = std::chrono::system_clock::now();
1561
1562 // checking for convergence
1563 if (valError < minValError) {
1564 convergenceCount = 0;
1565 } else {
1566 convergenceCount += settings.testInterval;
1567 }
1568
1569 // copy configuration when reached a minimum error
1570 if (valError < minValError ) {
1571 // Copy weights from deepNet to fNet
1572 Log() << std::setw(10) << nTrainEpochs
1573 << " Minimum Test error found - save the configuration " << Endl;
1574 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1575 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1576 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1577 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1578 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1579 // }
1580 }
1581 // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1582 // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1583
1584 minValError = valError;
1585 }
1586 else if ( minValError <= 0. )
1587 minValError = valError;
1588
1589 if (!computeLossInTraining) {
1590 trainingError = 0.0;
1591 // Compute training error.
1592 for (auto batch : trainingData) {
1593 auto inputTensor = batch.GetInput();
1594 auto outputMatrix = batch.GetOutput();
1595 auto weights = batch.GetWeights();
1596 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1597 }
1598 }
1599 // normalize loss to number of batches and add regularization term
1600 trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1601 trainingError += regTerm;
1602
1603 //Log the loss value
1604 fTrainHistory.AddValue("trainingError",nTrainEpochs,trainingError);
1605
1606 // stop measuring
1607 tend = std::chrono::system_clock::now();
1608
1609 // Compute numerical throughput.
1610 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1611 std::chrono::duration<double> elapsed1 = t1-tstart;
1612 // std::chrono::duration<double> elapsed2 = t2-tstart;
1613 // time to compute training and test errors
1614 std::chrono::duration<double> elapsed_testing = tend-t1;
1615
1616 double seconds = elapsed_seconds.count();
1617 // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1618 // nGFlops *= deepnet.GetNFlops() * 1e-9;
1619 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1620
1621 converged =
1622 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1623
1624
1625 Log() << std::setw(10) << nTrainEpochs << " | "
1626 << std::setw(12) << trainingError
1627 << std::setw(12) << valError
1628 << std::setw(12) << seconds / settings.testInterval
1629 << std::setw(12) << elapsed_testing.count()
1630 << std::setw(12) << 1. / eventTime
1631 << std::setw(12) << convergenceCount
1632 << Endl;
1633
1634 if (converged) {
1635 Log() << Endl;
1636 }
1637 tstart = std::chrono::system_clock::now();
1638 }
1639
1640 // if (stepCount % 10 == 0 || converged) {
1641 if (converged && debug) {
1642 Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << nTrainEpochs
1643 << Endl;
1644 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1645 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1646 for (size_t l = 0; l < weights_tensor.size(); ++l)
1647 weights_tensor[l].Print();
1648 bias_tensor[0].Print();
1649 }
1650
1651 }
1652
1653 trainingPhase++;
1654 } // end loop on training Phase
1655}
1656
1657////////////////////////////////////////////////////////////////////////////////
1659{
1660 if (fInteractive) {
1661 Log() << kFATAL << "Not implemented yet" << Endl;
1662 return;
1663 }
1664
1665 // using for training same scalar type defined for the prediction
1666 if (this->GetArchitectureString() == "GPU") {
1667#ifdef R__HAS_TMVAGPU
1668 Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1669#ifdef R__HAS_CUDNN
1671#else
1673#endif
1674#else
1675 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1676 "you have CUDA installed and it was successfully "
1677 "detected by CMAKE."
1678 << Endl;
1679 return;
1680#endif
1681 } else if (this->GetArchitectureString() == "CPU") {
1682#ifdef R__HAS_TMVACPU
1683 // note that number of threads used for BLAS might be different
1684 // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1685 Log() << kINFO << "Start of deep neural network training on CPU using MT, nthreads = "
1686 << gConfig().GetNCpu() << Endl << Endl;
1687#else
1688 Log() << kINFO << "Start of deep neural network training on single thread CPU (without ROOT-MT support) " << Endl
1689 << Endl;
1690#endif
1692 return;
1693 }
1694 else {
1695 Log() << kFATAL << this->GetArchitectureString() <<
1696 " is not a supported architecture for TMVA::MethodDL"
1697 << Endl;
1698 }
1699
1700}
1701
1702////////////////////////////////////////////////////////////////////////////////
1704{
1705 // fill the input tensor fXInput from the current Event data
1706 // with the correct shape depending on the model used
1707 // The input tensor is used for network prediction after training
1708 // using a single event. The network batch size must be equal to 1.
1709 // The architecture specified at compile time in ArchitectureImpl_t
1710 // is used. This should be the CPU architecture
1711
1712 if (!fNet || fNet->GetDepth() == 0) {
1713 Log() << kFATAL << "The network has not been trained and fNet is not built" << Endl;
1714 }
1715 if (fNet->GetBatchSize() != 1) {
1716 Log() << kFATAL << "FillINputTensor::Network batch size must be equal to 1 when doing single event predicition" << Endl;
1717 }
1718
1719 // get current event
1720 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1721 size_t nVariables = GetEvent()->GetNVariables();
1722
1723 // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1724 if (fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1725 R__ASSERT(fXInput.GetShape().size() < 4);
1726 size_t nc, nhw = 0;
1727 if (fXInput.GetShape().size() == 2) {
1728 nc = fXInput.GetShape()[0];
1729 if (nc != 1) {
1731 Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1" << Endl;
1732 }
1733 nhw = fXInput.GetShape()[1];
1734 } else {
1735 nc = fXInput.GetCSize();
1736 nhw = fXInput.GetWSize();
1737 }
1738 if (nVariables != nc * nhw) {
1739 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1740 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw << Endl;
1741 }
1742 for (size_t j = 0; j < nc; j++) {
1743 for (size_t k = 0; k < nhw; k++) {
1744 // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1745 fXInputBuffer[k * nc + j] = inputValues[j * nhw + k]; // for column layout !!!
1746 }
1747 }
1748 } else {
1749 // row-wise layout
1750 assert(fXInput.GetShape().size() >= 4);
1751 size_t nc = fXInput.GetCSize();
1752 size_t nh = fXInput.GetHSize();
1753 size_t nw = fXInput.GetWSize();
1754 size_t n = nc * nh * nw;
1755 if (nVariables != n) {
1756 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1757 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1758 << Endl;
1759 }
1760 for (size_t j = 0; j < n; j++) {
1761 // in this case TMVA event has same order as input tensor
1762 fXInputBuffer[j] = inputValues[j]; // for column layout !!!
1763 }
1764 }
1765 // copy buffer in input
1766 fXInput.GetDeviceBuffer().CopyFrom(fXInputBuffer);
1767 return;
1768}
1769
1770////////////////////////////////////////////////////////////////////////////////
1771Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1772{
1773
1775
1776 // perform the prediction
1777 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1778
1779 // return value
1780 double mvaValue = (*fYHat)(0, 0);
1781
1782 // for debugging
1783#ifdef DEBUG_MVAVALUE
1784 using Tensor_t = std::vector<MatrixImpl_t>;
1785 TMatrixF xInput(n1,n2, inputValues.data() );
1786 std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1787 xInput.Print();
1788 std::cout << "Output of DeepNet " << mvaValue << std::endl;
1789 auto & deepnet = *fNet;
1790 std::cout << "Loop on layers " << std::endl;
1791 for (int l = 0; l < deepnet.GetDepth(); ++l) {
1792 std::cout << "Layer " << l;
1793 const auto * layer = deepnet.GetLayerAt(l);
1794 const Tensor_t & layer_output = layer->GetOutput();
1795 layer->Print();
1796 std::cout << "DNN output " << layer_output.size() << std::endl;
1797 for (size_t i = 0; i < layer_output.size(); ++i) {
1798#ifdef R__HAS_TMVAGPU
1799 //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1800 TMatrixD m = layer_output[i];
1801#else
1802 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1803#endif
1804 m.Print();
1805 }
1806 const Tensor_t & layer_weights = layer->GetWeights();
1807 std::cout << "DNN weights " << layer_weights.size() << std::endl;
1808 if (layer_weights.size() > 0) {
1809 int i = 0;
1810#ifdef R__HAS_TMVAGPU
1811 TMatrixD m = layer_weights[i];
1812// TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1813#else
1814 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1815#endif
1816 m.Print();
1817 }
1818 }
1819#endif
1820
1821 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1822}
1823////////////////////////////////////////////////////////////////////////////////
1824/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1825////////////////////////////////////////////////////////////////////////////////
1826template <typename Architecture_t>
1827std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1828{
1829
1830 // Check whether the model is setup
1831 if (!fNet || fNet->GetDepth() == 0) {
1832 Log() << kFATAL << "The network has not been trained and fNet is not built"
1833 << Endl;
1834 }
1835
1836 // rebuild the networks
1837 this->SetBatchSize(batchSize);
1838 size_t inputDepth = this->GetInputDepth();
1839 size_t inputHeight = this->GetInputHeight();
1840 size_t inputWidth = this->GetInputWidth();
1841 size_t batchDepth = this->GetBatchDepth();
1842 size_t batchHeight = this->GetBatchHeight();
1843 size_t batchWidth = this->GetBatchWidth();
1844 ELossFunction J = fNet->GetLossFunction();
1845 EInitialization I = fNet->GetInitialization();
1846 ERegularization R = fNet->GetRegularization();
1847 Double_t weightDecay = fNet->GetWeightDecay();
1848
1849 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1850 using Matrix_t = typename Architecture_t::Matrix_t;
1851 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1852
1853 // create the deep neural network
1854 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1855 std::vector<DeepNet_t> nets{};
1856 fBuildNet = false;
1857 CreateDeepNet(deepNet,nets);
1858
1859 // copy weights from the saved fNet to the built DeepNet
1860 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1861 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1862 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1863 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1864 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1865 // }
1866 }
1867
1868 size_t n1 = deepNet.GetBatchHeight();
1869 size_t n2 = deepNet.GetBatchWidth();
1870 size_t n0 = deepNet.GetBatchSize();
1871 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1872 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1873 n1 = deepNet.GetBatchSize();
1874 n0 = 1;
1875 }
1876 //this->SetBatchDepth(n0);
1877 Long64_t nEvents = lastEvt - firstEvt;
1878 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1879 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1880
1881
1882 // Tensor_t xInput;
1883 // for (size_t i = 0; i < n0; ++i)
1884 // xInput.emplace_back(Matrix_t(n1,n2));
1885
1886 // create pointer to output matrix used for the predictions
1887 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1888
1889 // use timer
1890 Timer timer( nEvents, GetName(), kTRUE );
1891
1892 if (logProgress)
1893 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1894 << "Evaluation of " << GetMethodName() << " on "
1895 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1896 << " sample (" << nEvents << " events)" << Endl;
1897
1898
1899 // eventg loop
1900 std::vector<double> mvaValues(nEvents);
1901
1902
1903 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1904
1905 Long64_t ievt_end = ievt + batchSize;
1906 // case of batch prediction for
1907 if (ievt_end <= lastEvt) {
1908
1909 if (ievt == firstEvt) {
1910 Data()->SetCurrentEvent(ievt);
1911 size_t nVariables = GetEvent()->GetNVariables();
1912
1913 if (n1 == batchSize && n0 == 1) {
1914 if (n2 != nVariables) {
1915 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1916 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1917 << Endl;
1918 }
1919 } else {
1920 if (n1*n2 != nVariables || n0 != batchSize) {
1921 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1922 << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1923 << Endl;
1924 }
1925 }
1926 }
1927
1928 auto batch = testData.GetTensorBatch();
1929 auto inputTensor = batch.GetInput();
1930
1931 auto xInput = batch.GetInput();
1932 // make the prediction
1933 deepNet.Prediction(yHat, xInput, fOutputFunction);
1934 for (size_t i = 0; i < batchSize; ++i) {
1935 double value = yHat(i,0);
1936 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1937 }
1938 }
1939 else {
1940 // case of remaining events: compute prediction by single event !
1941 for (Long64_t i = ievt; i < lastEvt; ++i) {
1942 Data()->SetCurrentEvent(i);
1943 mvaValues[i] = GetMvaValue();
1944 }
1945 }
1946 }
1947
1948 if (logProgress) {
1949 Log() << kINFO
1950 << "Elapsed time for evaluation of " << nEvents << " events: "
1951 << timer.GetElapsedTime() << " " << Endl;
1952 }
1953
1954 return mvaValues;
1955}
1956
1957//////////////////////////////////////////////////////////////////////////
1958/// Get the regression output values for a single event
1959//////////////////////////////////////////////////////////////////////////
1960const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1961{
1962
1963 FillInputTensor ();
1964
1965 // perform the network prediction
1966 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1967
1968 size_t nTargets = DataInfo().GetNTargets();
1969 R__ASSERT(nTargets == fYHat->GetNcols());
1970
1971 std::vector<Float_t> output(nTargets);
1972 for (size_t i = 0; i < nTargets; i++)
1973 output[i] = (*fYHat)(0, i);
1974
1975 // ned to transform back output values
1976 if (fRegressionReturnVal == NULL)
1977 fRegressionReturnVal = new std::vector<Float_t>(nTargets);
1978 R__ASSERT(fRegressionReturnVal->size() == nTargets);
1979
1980 // N.B. one should cache here temporary event class
1981 Event *evT = new Event(*GetEvent());
1982 for (size_t i = 0; i < nTargets; ++i) {
1983 evT->SetTarget(i, output[i]);
1984 }
1985 const Event *evT2 = GetTransformationHandler().InverseTransform(evT);
1986 for (size_t i = 0; i < nTargets; ++i) {
1987 (*fRegressionReturnVal)[i] = evT2->GetTarget(i);
1988 }
1989 delete evT;
1990 return *fRegressionReturnVal;
1991}
1992//////////////////////////////////////////////////////////////////////////
1993/// Get the multi-class output values for a single event
1994//////////////////////////////////////////////////////////////////////////
1995const std::vector<Float_t> &TMVA::MethodDL::GetMulticlassValues()
1996{
1997
1998 FillInputTensor();
1999
2000 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
2001
2002 size_t nClasses = DataInfo().GetNClasses();
2003 R__ASSERT(nClasses == fYHat->GetNcols());
2004
2005 if (fMulticlassReturnVal == NULL) {
2006 fMulticlassReturnVal = new std::vector<Float_t>(nClasses);
2007 }
2008 R__ASSERT(fMulticlassReturnVal->size() == nClasses);
2009
2010 for (size_t i = 0; i < nClasses; i++) {
2011 (*fMulticlassReturnVal)[i] = (*fYHat)(0, i);
2012 }
2013 return *fMulticlassReturnVal;
2014}
2015
2016////////////////////////////////////////////////////////////////////////////////
2017/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
2018/// Here we will evaluate using a default batch size and the same architecture used for
2019/// Training
2020////////////////////////////////////////////////////////////////////////////////
2021std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
2022{
2023
2024 Long64_t nEvents = Data()->GetNEvents();
2025 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2026 if (firstEvt < 0) firstEvt = 0;
2027 nEvents = lastEvt-firstEvt;
2028
2029 // use same batch size as for training (from first strategy)
2030 size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2031 size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2032 if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
2033
2034 // using for training same scalar type defined for the prediction
2035 if (this->GetArchitectureString() == "GPU") {
2036#ifdef R__HAS_TMVAGPU
2037 Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2038#ifdef R__HAS_CUDNN
2039 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2040#else
2041 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2042#endif
2043
2044#endif
2045 }
2046 Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2047 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2048}
2049////////////////////////////////////////////////////////////////////////////////
2050void MethodDL::AddWeightsXMLTo(void * parent) const
2051{
2052 // Create the parent XML node with name "Weights"
2053 auto & xmlEngine = gTools().xmlengine();
2054 void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2055
2056 /*! Get all necessary information, in order to be able to reconstruct the net
2057 * if we read the same XML file. */
2058
2059 // Deep Net specific info
2060 Int_t depth = fNet->GetDepth();
2061
2062 Int_t inputDepth = fNet->GetInputDepth();
2063 Int_t inputHeight = fNet->GetInputHeight();
2064 Int_t inputWidth = fNet->GetInputWidth();
2065
2066 Int_t batchSize = fNet->GetBatchSize();
2067
2068 Int_t batchDepth = fNet->GetBatchDepth();
2069 Int_t batchHeight = fNet->GetBatchHeight();
2070 Int_t batchWidth = fNet->GetBatchWidth();
2071
2072 char lossFunction = static_cast<char>(fNet->GetLossFunction());
2073 char initialization = static_cast<char>(fNet->GetInitialization());
2074 char regularization = static_cast<char>(fNet->GetRegularization());
2075
2076 Double_t weightDecay = fNet->GetWeightDecay();
2077
2078 // Method specific info (not sure these are needed)
2079 char outputFunction = static_cast<char>(this->GetOutputFunction());
2080 //char lossFunction = static_cast<char>(this->GetLossFunction());
2081
2082 // Add attributes to the parent node
2083 xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2084
2085 xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2086 xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2087 xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2088
2089 xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2090 xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2091 xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2092 xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2093
2094 xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2095 xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2096 xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2097 xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2098
2099 gTools().AddAttr(nn, "WeightDecay", weightDecay);
2100
2101
2102 for (Int_t i = 0; i < depth; i++)
2103 {
2104 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2105 }
2106
2107
2108}
2109
2110////////////////////////////////////////////////////////////////////////////////
2112{
2113
2114 auto netXML = gTools().GetChild(rootXML, "Weights");
2115 if (!netXML){
2116 netXML = rootXML;
2117 }
2118
2119 size_t netDepth;
2120 gTools().ReadAttr(netXML, "NetDepth", netDepth);
2121
2122 size_t inputDepth, inputHeight, inputWidth;
2123 gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2124 gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2125 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2126
2127 size_t batchSize, batchDepth, batchHeight, batchWidth;
2128 gTools().ReadAttr(netXML, "BatchSize", batchSize);
2129 // use always batchsize = 1
2130 //batchSize = 1;
2131 gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2132 gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2133 gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2134
2135 char lossFunctionChar;
2136 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2137 char initializationChar;
2138 gTools().ReadAttr(netXML, "Initialization", initializationChar);
2139 char regularizationChar;
2140 gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2141 char outputFunctionChar;
2142 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2143 double weightDecay;
2144 gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2145
2146 // create the net
2147
2148 // DeepNetCpu_t is defined in MethodDL.h
2149 this->SetInputDepth(inputDepth);
2150 this->SetInputHeight(inputHeight);
2151 this->SetInputWidth(inputWidth);
2152 this->SetBatchDepth(batchDepth);
2153 this->SetBatchHeight(batchHeight);
2154 this->SetBatchWidth(batchWidth);
2155
2156
2157
2158 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2159 batchHeight, batchWidth,
2160 static_cast<ELossFunction>(lossFunctionChar),
2161 static_cast<EInitialization>(initializationChar),
2162 static_cast<ERegularization>(regularizationChar),
2163 weightDecay));
2164
2165 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
2166
2167
2168 //size_t previousWidth = inputWidth;
2169 auto layerXML = gTools().xmlengine().GetChild(netXML);
2170
2171 // loop on the layer and add them to the network
2172 for (size_t i = 0; i < netDepth; i++) {
2173
2174 TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2175
2176 // case of dense layer
2177 if (layerName == "DenseLayer") {
2178
2179 // read width and activation function and then we can create the layer
2180 size_t width = 0;
2181 gTools().ReadAttr(layerXML, "Width", width);
2182
2183 // Read activation function.
2184 TString funcString;
2185 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2186 EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2187
2188
2189 fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2190
2191 }
2192 // Convolutional Layer
2193 else if (layerName == "ConvLayer") {
2194
2195 // read width and activation function and then we can create the layer
2196 size_t depth = 0;
2197 gTools().ReadAttr(layerXML, "Depth", depth);
2198 size_t fltHeight, fltWidth = 0;
2199 size_t strideRows, strideCols = 0;
2200 size_t padHeight, padWidth = 0;
2201 gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2202 gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2203 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2204 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2205 gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2206 gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2207
2208 // Read activation function.
2209 TString funcString;
2210 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2211 EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2212
2213
2214 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2215 padHeight, padWidth, actFunction);
2216
2217 }
2218
2219 // MaxPool Layer
2220 else if (layerName == "MaxPoolLayer") {
2221
2222 // read maxpool layer info
2223 size_t filterHeight, filterWidth = 0;
2224 size_t strideRows, strideCols = 0;
2225 gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2226 gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2227 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2228 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2229
2230 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2231 }
2232 // Reshape Layer
2233 else if (layerName == "ReshapeLayer") {
2234
2235 // read reshape layer info
2236 size_t depth, height, width = 0;
2237 gTools().ReadAttr(layerXML, "Depth", depth);
2238 gTools().ReadAttr(layerXML, "Height", height);
2239 gTools().ReadAttr(layerXML, "Width", width);
2240 int flattening = 0;
2241 gTools().ReadAttr(layerXML, "Flattening",flattening );
2242
2243 fNet->AddReshapeLayer(depth, height, width, flattening);
2244
2245 }
2246 // RNN Layer
2247 else if (layerName == "RNNLayer") {
2248
2249 // read RNN layer info
2250 size_t stateSize,inputSize, timeSteps = 0;
2251 int rememberState= 0;
2252 int returnSequence = 0;
2253 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2254 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2255 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2256 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2257 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2258
2259 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2260
2261 }
2262 // LSTM Layer
2263 else if (layerName == "LSTMLayer") {
2264
2265 // read RNN layer info
2266 size_t stateSize,inputSize, timeSteps = 0;
2267 int rememberState, returnSequence = 0;
2268 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2269 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2270 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2271 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2272 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2273
2274 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2275
2276 }
2277 // GRU Layer
2278 else if (layerName == "GRULayer") {
2279
2280 // read RNN layer info
2281 size_t stateSize,inputSize, timeSteps = 0;
2282 int rememberState, returnSequence, resetGateAfter = 0;
2283 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2284 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2285 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2286 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2287 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2288 gTools().ReadAttr(layerXML, "ResetGateAfter", resetGateAfter);
2289
2290 if (!resetGateAfter && ArchitectureImpl_t::IsCudnn())
2291 Warning("ReadWeightsFromXML",
2292 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2293
2294 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2295 }
2296 // BatchNorm Layer
2297 else if (layerName == "BatchNormLayer") {
2298 // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2299 fNet->AddBatchNormLayer(0., 0.0);
2300 }
2301 // read weights and biases
2302 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2303
2304 // read next layer
2305 layerXML = gTools().GetNextChild(layerXML);
2306 }
2307
2308 fBuildNet = false;
2309 // create now the input and output matrices
2310 //int n1 = batchHeight;
2311 //int n2 = batchWidth;
2312 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2313 //if (fXInput.size() > 0) fXInput.clear();
2314 //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2316 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2317 // make here a ColumnMajor tensor
2318 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth(),TMVA::Experimental::MemoryLayout::ColumnMajor );
2320
2321 // create pointer to output matrix used for the predictions
2322 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2323
2324
2325}
2326
2327
2328////////////////////////////////////////////////////////////////////////////////
2329void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2330{
2331}
2332
2333////////////////////////////////////////////////////////////////////////////////
2335{
2336 // TODO
2337 return NULL;
2338}
2339
2340////////////////////////////////////////////////////////////////////////////////
2342{
2343 // TODO
2344}
2345
2346} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition RSha256.hxx:103
int Int_t
Signed integer 4 bytes (int).
Definition RtypesCore.h:59
bool Bool_t
Boolean (0=false, 1=true) (bool).
Definition RtypesCore.h:77
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
TMatrixT< Double_t > TMatrixD
Definition TMatrixDfwd.h:23
TMatrixT< Float_t > TMatrixF
Definition TMatrixFfwd.h:23
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2496
The Formula class.
Definition TFormula.h:89
Double_t Eval(Args... args) const
Set first 1, 2, 3 or 4 variables (e.g.
Definition TFormula.h:326
UInt_t GetNCpu()
Definition Config.h:70
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
MsgLogger & Log() const
Adadelta Optimizer class.
Definition Adadelta.h:45
Adagrad Optimizer class.
Definition Adagrad.h:45
Adam Optimizer class.
Definition Adam.h:45
static void PrintTensor(const Tensor_t &A, const std::string name="Cpu-tensor", bool truncate=false)
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition Cpu.h:108
static bool IsCudnn()
Definition Cpu.h:131
Generic Deep Neural Network class.
Definition DeepNet.h:73
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition DeepNet.h:825
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
Definition DeepNet.h:608
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition DeepNet.h:740
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
Definition DeepNet.h:567
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition DeepNet.h:485
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition DeepNet.h:439
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition DeepNet.h:773
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition DeepNet.h:524
Generic layer class.
Definition DenseLayer.h:59
RMSProp Optimizer class.
Definition RMSProp.h:45
Stochastic Batch Gradient Descent Optimizer class.
Definition SGD.h:46
Generic General Layer class.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Class that contains all the data information.
Definition DataSetInfo.h:62
UInt_t GetNClasses() const
Types::ETreeType GetCurrentType() const
Definition DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition Event.cxx:367
UInt_t GetNVariables() const
accessor to the number of variables
Definition Event.cxx:316
UInt_t GetClass() const
Definition Event.h:86
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
const char * GetName() const override
Definition MethodBase.h:337
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition MethodBase.h:689
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
Definition MethodBase.h:349
const TString & GetMethodName() const
Definition MethodBase.h:334
const Event * GetEvent() const
Definition MethodBase.h:754
DataSetInfo & DataInfo() const
Definition MethodBase.h:413
UInt_t GetNVariables() const
Definition MethodBase.h:348
Types::EAnalysisType fAnalysisType
Definition MethodBase.h:598
UInt_t GetNvar() const
Definition MethodBase.h:347
TrainingHistory fTrainHistory
Definition MethodBase.h:428
DataSet * Data() const
Definition MethodBase.h:412
IPythonInteractive * fInteractive
Definition MethodBase.h:451
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition MethodDL.h:108
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition MethodDL.h:183
DNN::ELossFunction fLossFunction
The loss function.
Definition MethodDL.h:190
std::vector< size_t > fInputShape
Contains the batch size (no.
Definition MethodDL.h:178
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
Check the type of analysis the deep learning network can do.
TString fLayoutString
The string defining the layout of the deep net.
Definition MethodDL.h:194
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) override
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class Here we will evaluate...
void SetInputDepth(int inputDepth)
Setters.
Definition MethodDL.h:286
std::unique_ptr< MatrixImpl_t > fYHat
Definition MethodDL.h:208
size_t GetBatchHeight() const
Definition MethodDL.h:263
void ReadWeightsFromXML(void *wghtnode) override
TString fWeightInitializationString
The string defining the weight initialization method.
Definition MethodDL.h:197
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition MethodDL.cxx:767
TensorImpl_t fXInput
Definition MethodDL.h:206
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero).
Definition MethodDL.h:186
TString fArchitectureString
The string defining the architecture: CPU or GPU.
Definition MethodDL.h:198
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
void Train() override
Methods for training the deep learning network.
void TrainDeepNet()
train of deep neural network using the defined architecture
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition MethodDL.h:280
DNN::EOutputFunction GetOutputFunction() const
Definition MethodDL.h:269
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition MethodDL.cxx:582
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition MethodDL.h:273
void SetInputWidth(int inputWidth)
Definition MethodDL.h:288
HostBufferImpl_t fXInputBuffer
Definition MethodDL.h:207
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition MethodDL.h:184
size_t GetInputDepth() const
Definition MethodDL.h:255
std::unique_ptr< DeepNetImpl_t > fNet
Definition MethodDL.h:209
TString GetInputLayoutString() const
Definition MethodDL.h:272
void SetBatchHeight(size_t batchHeight)
Definition MethodDL.h:293
void GetHelpMessage() const override
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition MethodDL.h:93
size_t GetInputHeight() const
Definition MethodDL.h:256
TString GetArchitectureString() const
Definition MethodDL.h:278
void ParseBatchLayout()
Parse the input layout.
Definition MethodDL.cxx:481
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition MethodDL.cxx:889
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
const std::vector< Float_t > & GetRegressionValues() override
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition MethodDL.h:199
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition MethodDL.h:189
DNN::EInitialization fWeightInitialization
The initialization method.
Definition MethodDL.h:188
void AddWeightsXMLTo(void *parent) const override
size_t GetBatchDepth() const
Definition MethodDL.h:262
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition MethodDL.cxx:930
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition MethodDL.h:204
size_t GetInputWidth() const
Definition MethodDL.h:257
void SetInputShape(std::vector< size_t > inputShape)
Definition MethodDL.h:289
DNN::ELossFunction GetLossFunction() const
Definition MethodDL.h:270
TString fBatchLayoutString
The string defining the layout of the batch.
Definition MethodDL.h:193
void DeclareOptions() override
The option handling methods.
Definition MethodDL.cxx:166
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition MethodDL.cxx:668
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition MethodDL.cxx:828
TString fTrainingStrategyString
The string defining the training strategy.
Definition MethodDL.h:196
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition MethodDL.h:110
void SetBatchDepth(size_t batchDepth)
Definition MethodDL.h:292
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
void SetBatchWidth(size_t batchWidth)
Definition MethodDL.h:294
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
void ReadWeightsFromStream(std::istream &) override
DNN::EInitialization GetWeightInitialization() const
Definition MethodDL.h:268
void SetBatchSize(size_t batchSize)
Definition MethodDL.h:291
TString GetLayoutString() const
Definition MethodDL.h:274
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition MethodDL.h:182
void ProcessOptions() override
Definition MethodDL.cxx:218
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition MethodDL.h:106
void Init() override
default initializations
Definition MethodDL.cxx:431
size_t GetBatchWidth() const
Definition MethodDL.h:264
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition MethodDL.h:107
const std::vector< Float_t > & GetMulticlassValues() override
virtual ~MethodDL()
Virtual Destructor.
void ParseInputLayout()
Parse the input layout.
Definition MethodDL.cxx:438
void FillInputTensor()
Get the input event tensor for evaluation Internal function to fill the fXInput tensor with the corre...
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition MethodDL.h:201
const Ranking * CreateRanking() override
void SetInputHeight(int inputHeight)
Definition MethodDL.h:287
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition MethodDL.cxx:528
TString fErrorStrategy
The string defining the error strategy for training.
Definition MethodDL.h:195
TString fInputLayoutString
The string defining the layout of the input.
Definition MethodDL.h:192
EMsgType GetMinType() const
Definition MsgLogger.h:69
Ranking for variables in method (implementation).
Definition Ranking.h:48
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition Timer.cxx:145
TXMLEngine & xmlengine()
Definition Tools.h:262
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1125
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1137
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kMulticlass
Definition Types.h:129
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
@ kFATAL
Definition Types.h:61
void Print(Option_t *name="") const override
Print the matrix as a table of elements.
void Print(Option_t *option="") const override
Print TNamed name and title.
Definition TNamed.cxx:127
An array of TObjects.
Definition TObjArray.h:31
Collectable string class.
Definition TObjString.h:28
const TString & GetString() const
Definition TObjString.h:46
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition TObject.cxx:1084
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition TObject.cxx:1098
Basic string class.
Definition TString.h:138
Ssiz_t Length() const
Definition TString.h:425
Int_t Atoi() const
Return integer value of string.
Definition TString.cxx:1994
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition TString.cxx:1170
Double_t Atof() const
Return floating-point value contained in string.
Definition TString.cxx:2060
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition TString.cxx:1864
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition TString.cxx:545
const char * Data() const
Definition TString.h:384
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition TString.h:713
@ kTrailing
Definition TString.h:284
@ kBoth
Definition TString.h:284
void ToUpper()
Change string to upper case.
Definition TString.cxx:1202
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition TString.cxx:2270
Bool_t IsNull() const
Definition TString.h:422
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2385
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
const Int_t n
Definition legend1.C:16
#define I(x, y, z)
EOptimizer
Enum representing the optimizer used for training.
Definition Functions.h:82
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition DataLoader.h:39
EOutputFunction
Enum that represents output functions.
Definition Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition Functions.h:238
ERegularization
Enum representing the regularization type applied for a given layer.
Definition Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition Functions.h:57
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition MethodDL.cxx:74
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Bool_t IsNaN(Double_t x)
Definition TMath.h:903
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition TMath.h:767
All of the options that can be specified in the training string.
Definition MethodDL.h:72
std::map< TString, double > optimizerParams
Definition MethodDL.h:84
DNN::EOptimizer optimizer
Definition MethodDL.h:78
DNN::ERegularization regularization
Definition MethodDL.h:77
std::vector< Double_t > dropoutProbabilities
Definition MethodDL.h:83
TMarker m
Definition textangle.C:8
TLine l
Definition textangle.C:4
auto * t1
Definition textangle.C:20