Logo ROOT   6.18/05
Reference Guide
MethodDL.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/cnn:$Id$Ndl
2// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3/**********************************************************************************
4 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5 * Package: TMVA *
6 * Class : MethodDL *
7 * Web : http://tmva.sourceforge.net *
8 * *
9 * Description: *
10 * Deep Neural Network Method *
11 * *
12 * Authors (alphabetical): *
13 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28#include "TFormula.h"
29#include "TString.h"
30#include "TMath.h"
31
32#include "TMVA/Tools.h"
33#include "TMVA/Configurable.h"
34#include "TMVA/IMethod.h"
36#include "TMVA/MethodDL.h"
37#include "TMVA/Types.h"
39#include "TMVA/DNN/Functions.h"
41#include "TMVA/DNN/SGD.h"
42#include "TMVA/DNN/Adam.h"
43#include "TMVA/DNN/Adagrad.h"
44#include "TMVA/DNN/RMSProp.h"
45#include "TMVA/DNN/Adadelta.h"
46#include "TMVA/Timer.h"
47
48#include "TStopwatch.h"
49
50#include <chrono>
51
54
55using namespace TMVA::DNN::CNN;
56using namespace TMVA::DNN;
57
63
64
65namespace TMVA {
66
67
68////////////////////////////////////////////////////////////////////////////////
69TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
70{
71 key.ToUpper();
72 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
73 if (it == keyValueMap.end()) {
74 return TString("");
75 }
76 return it->second;
77}
78
79////////////////////////////////////////////////////////////////////////////////
80template <typename T>
81T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
82
83////////////////////////////////////////////////////////////////////////////////
84template <>
85int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
86{
87 TString value(fetchValueTmp(keyValueMap, key));
88 if (value == "") {
89 return defaultValue;
90 }
91 return value.Atoi();
92}
93
94////////////////////////////////////////////////////////////////////////////////
95template <>
96double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
97{
98 TString value(fetchValueTmp(keyValueMap, key));
99 if (value == "") {
100 return defaultValue;
101 }
102 return value.Atof();
103}
104
105////////////////////////////////////////////////////////////////////////////////
106template <>
107TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
108{
109 TString value(fetchValueTmp(keyValueMap, key));
110 if (value == "") {
111 return defaultValue;
112 }
113 return value;
114}
115
116////////////////////////////////////////////////////////////////////////////////
117template <>
118bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
119{
120 TString value(fetchValueTmp(keyValueMap, key));
121 if (value == "") {
122 return defaultValue;
123 }
124
125 value.ToUpper();
126 if (value == "TRUE" || value == "T" || value == "1") {
127 return true;
128 }
129
130 return false;
131}
132
133////////////////////////////////////////////////////////////////////////////////
134template <>
135std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
136 std::vector<double> defaultValue)
137{
138 TString parseString(fetchValueTmp(keyValueMap, key));
139 if (parseString == "") {
140 return defaultValue;
141 }
142
143 parseString.ToUpper();
144 std::vector<double> values;
145
146 const TString tokenDelim("+");
147 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
148 TIter nextToken(tokenStrings);
149 TObjString *tokenString = (TObjString *)nextToken();
150 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
151 std::stringstream sstr;
152 double currentValue;
153 sstr << tokenString->GetString().Data();
154 sstr >> currentValue;
155 values.push_back(currentValue);
156 }
157 return values;
158}
159
160////////////////////////////////////////////////////////////////////////////////
162{
163 // Set default values for all option strings
164
165 DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
166
167 DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
168
169 DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
170
171 DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
172 " or cross entropy (binary classification).");
173 AddPreDefVal(TString("CROSSENTROPY"));
174 AddPreDefVal(TString("SUMOFSQUARES"));
175 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
176
177 DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
178 AddPreDefVal(TString("XAVIER"));
179 AddPreDefVal(TString("XAVIERUNIFORM"));
180
181 DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
182
183 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
184 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
185 "Specify as 100 to use exactly 100 events. (Default: 20%)");
186
187 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
188 AddPreDefVal(TString("STANDARD"));
189 AddPreDefVal(TString("CPU"));
190 AddPreDefVal(TString("GPU"));
191 AddPreDefVal(TString("OPENCL"));
192
193 // define training stratgey separated by a separator "|"
194 DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-1,"
195 "Momentum=0.3,"
196 "Repetitions=3,"
197 "ConvergenceSteps=50,"
198 "BatchSize=30,"
199 "TestRepetitions=7,"
200 "WeightDecay=0.0,"
201 "Regularization=None,"
202 "DropConfig=0.0,"
203 "DropRepetitions=5"
204 "|"
205 "LearningRate=1e-4,"
206 "Momentum=0.3,"
207 "Repetitions=3,"
208 "ConvergenceSteps=50,"
209 "MaxEpochs=2000,"
210 "BatchSize=20,"
211 "TestRepetitions=7,"
212 "WeightDecay=0.001,"
213 "Regularization=L2,"
214 "DropConfig=0.0+0.5+0.5,"
215 "DropRepetitions=5,"
216 "Multithreading=True",
217 "TrainingStrategy", "Defines the training strategies.");
218}
219
220////////////////////////////////////////////////////////////////////////////////
222{
223
225 Log() << kINFO << "Will ignore negative events in training!" << Endl;
226 }
227
228 if (fArchitectureString == "STANDARD") {
229 Log() << kINFO << "The STANDARD architecture has been deprecated. "
230 "Please use Architecture=CPU or Architecture=CPU."
231 "See the TMVA Users' Guide for instructions if you "
232 "encounter problems."
233 << Endl;
234 }
235 if (fArchitectureString == "OPENCL") {
236 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
237 "Please use Architecture=CPU or Architecture=CPU for the "
238 "time being. See the TMVA Users' Guide for instructions "
239 "if you encounter problems."
240 << Endl;
241 }
242
243 // the architecture can now be set at runtime as an option
244
245
246 if (fArchitectureString == "GPU") {
247#ifndef R__HAS_TMVAGPU // case TMVA does not support GPU
248 Log() << kERROR << "CUDA backend not enabled. Please make sure "
249 "you have CUDA installed and it was successfully "
250 "detected by CMAKE by using -Dcuda=On "
251 << Endl;
252#ifdef R__HAS_TMVACPU
253 fArchitectureString = "CPU";
254 Log() << kINFO << "Will use now the CPU architecture !" << Endl;
255#else
256 fArchitectureString = "Standard";
257 Log() << kINFO << "Will use now the Standard architecture !" << Endl;
258#endif
259#else
260 Log() << kINFO << "Will use now the GPU architecture !" << Endl;
261#endif
262 }
263
264 else if (fArchitectureString == "CPU") {
265#ifndef R__HAS_TMVACPU // TMVA has no CPU support
266 Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
267 "you have a BLAS implementation and it was successfully "
268 "detected by CMake as well that the imt CMake flag is set."
269 << Endl;
270#ifdef R__HAS_TMVAGPU
271 fArchitectureString = "GPU";
272 Log() << kINFO << "Will use now the GPU architecture !" << Endl;
273#else
274 fArchitectureString = "STANDARD";
275 Log() << kINFO << "Will use now the Standard architecture !" << Endl;
276#endif
277#else
278 Log() << kINFO << "Will use now the CPU architecture !" << Endl;
279#endif
280 }
281
282 else {
283 Log() << kINFO << "Will use the deprecated STANDARD architecture !" << Endl;
284 fArchitectureString = "STANDARD";
285 }
286
287 // Input Layout
290
291 // Loss function and output.
294 if (fErrorStrategy == "SUMOFSQUARES") {
295 fLossFunction = ELossFunction::kMeanSquaredError;
296 }
297 if (fErrorStrategy == "CROSSENTROPY") {
299 }
301 } else if (fAnalysisType == Types::kRegression) {
302 if (fErrorStrategy != "SUMOFSQUARES") {
303 Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
304 << " neural net error function. Setting error function to "
305 << " SUMOFSQUARES now." << Endl;
306 }
307
308 fLossFunction = ELossFunction::kMeanSquaredError;
310 } else if (fAnalysisType == Types::kMulticlass) {
311 if (fErrorStrategy == "SUMOFSQUARES") {
312 fLossFunction = ELossFunction::kMeanSquaredError;
313 }
314 if (fErrorStrategy == "CROSSENTROPY") {
316 }
317 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
318 fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
319 }
321 }
322
323 // Initialization
324 // the biases will be always initialized to zero
325 if (fWeightInitializationString == "XAVIER") {
327 } else if (fWeightInitializationString == "XAVIERUNIFORM") {
329 } else if (fWeightInitializationString == "GAUSS") {
331 } else if (fWeightInitializationString == "UNIFORM") {
333 } else if (fWeightInitializationString == "ZERO") {
335 } else {
337 }
338
339 // Training settings.
340
342 for (auto &block : strategyKeyValues) {
343 TTrainingSettings settings;
344
345 settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
346 settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
347 settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
348 settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
349 settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
350 settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
351 settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
352 settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
353
354 TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
355 if (regularization == "L1") {
357 } else if (regularization == "L2") {
359 } else {
361 }
362
363 TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
364 settings.optimizerName = optimizer;
365 if (optimizer == "SGD") {
367 } else if (optimizer == "ADAM") {
369 } else if (optimizer == "ADAGRAD") {
371 } else if (optimizer == "RMSPROP") {
373 } else if (optimizer == "ADADELTA") {
375 } else {
376 // Make Adam as default choice if the input string is
377 // incorrect.
379 settings.optimizerName = "ADAM";
380 }
381
382
383 TString strMultithreading = fetchValueTmp(block, "Multithreading", TString("True"));
384
385 if (strMultithreading.BeginsWith("T")) {
386 settings.multithreading = true;
387 } else {
388 settings.multithreading = false;
389 }
390
391 fTrainingSettings.push_back(settings);
392 }
393
394 // case inputlayout and batch layout was not given. Use default then
395 // (1, batchsize, nvariables)
396 if (fInputWidth == 0 && fInputHeight == 0 && fInputDepth == 0) {
397 fInputDepth = 1;
398 fInputHeight = 1;
400 }
401 if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
402 if (fInputHeight == 1 && fInputDepth == 1) {
403 // case of (1, batchsize, input features)
404 fBatchDepth = 1;
405 fBatchHeight = fTrainingSettings.front().batchSize;
407 }
408 else { // more general cases (e.g. for CNN)
409 fBatchDepth = fTrainingSettings.front().batchSize;
412 }
413 }
414}
415
416////////////////////////////////////////////////////////////////////////////////
417/// default initializations
419{
420 // Nothing to do here
421}
422
423////////////////////////////////////////////////////////////////////////////////
424/// Parse the input layout
426{
427 // Define the delimiter
428 const TString delim("|");
429
430 // Get the input layout string
431 TString inputLayoutString = this->GetInputLayoutString();
432
433 size_t depth = 0;
434 size_t height = 0;
435 size_t width = 0;
436
437 // Split the input layout string
438 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
439 TIter nextInputDim(inputDimStrings);
440 TObjString *inputDimString = (TObjString *)nextInputDim();
441 int idxToken = 0;
442
443 for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
444 switch (idxToken) {
445 case 0: // input depth
446 {
447 TString strDepth(inputDimString->GetString());
448 depth = (size_t)strDepth.Atoi();
449 } break;
450 case 1: // input height
451 {
452 TString strHeight(inputDimString->GetString());
453 height = (size_t)strHeight.Atoi();
454 } break;
455 case 2: // input width
456 {
457 TString strWidth(inputDimString->GetString());
458 width = (size_t)strWidth.Atoi();
459 } break;
460 }
461 ++idxToken;
462 }
463
464 this->SetInputDepth(depth);
465 this->SetInputHeight(height);
466 this->SetInputWidth(width);
467}
468
469////////////////////////////////////////////////////////////////////////////////
470/// Parse the input layout
472{
473 // Define the delimiter
474 const TString delim("|");
475
476 // Get the input layout string
477 TString batchLayoutString = this->GetBatchLayoutString();
478
479 size_t batchDepth = 0;
480 size_t batchHeight = 0;
481 size_t batchWidth = 0;
482
483 // Split the input layout string
484 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
485 TIter nextBatchDim(batchDimStrings);
486 TObjString *batchDimString = (TObjString *)nextBatchDim();
487 int idxToken = 0;
488
489 for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
490 switch (idxToken) {
491 case 0: // input depth
492 {
493 TString strDepth(batchDimString->GetString());
494 batchDepth = (size_t)strDepth.Atoi();
495 } break;
496 case 1: // input height
497 {
498 TString strHeight(batchDimString->GetString());
499 batchHeight = (size_t)strHeight.Atoi();
500 } break;
501 case 2: // input width
502 {
503 TString strWidth(batchDimString->GetString());
504 batchWidth = (size_t)strWidth.Atoi();
505 } break;
506 }
507 ++idxToken;
508 }
509
510 this->SetBatchDepth(batchDepth);
511 this->SetBatchHeight(batchHeight);
512 this->SetBatchWidth(batchWidth);
513}
514
515////////////////////////////////////////////////////////////////////////////////
516/// Create a deep net based on the layout string
517template <typename Architecture_t, typename Layer_t>
520{
521 // Layer specification, layer details
522 const TString layerDelimiter(",");
523 const TString subDelimiter("|");
524
525 TString layoutString = this->GetLayoutString();
526
527 //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
528
529 // Split layers
530 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
531 TIter nextLayer(layerStrings);
532 TObjString *layerString = (TObjString *)nextLayer();
533
534
535 for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
536
537 // Split layer details
538 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
539 TIter nextToken(subStrings);
540 TObjString *token = (TObjString *)nextToken();
541
542 // Determine the type of the layer
543 TString strLayerType = token->GetString();
544
545
546 if (strLayerType == "DENSE") {
547 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
548 } else if (strLayerType == "CONV") {
549 ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
550 } else if (strLayerType == "MAXPOOL") {
551 ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
552 } else if (strLayerType == "RESHAPE") {
553 ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
554 } else if (strLayerType == "RNN") {
555 ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter);
556 // } else if (strLayerType == "LSTM") {
557 // Log() << kError << "LSTM Layer is not yet fully implemented" << Endl;
558 // //ParseLstmLayer(deepNet, nets, layerString->GetString(), subDelimiter);
559 // break;
560 } else {
561 // no type of layer specified - assume is dense layer as in old DNN interface
562 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
563 }
564 }
565}
566
567////////////////////////////////////////////////////////////////////////////////
568/// Pases the layer string and creates the appropriate dense layer
569template <typename Architecture_t, typename Layer_t>
571 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
572 TString delim)
573{
574 int width = 0;
576
577 // this return number of input variables for the method
578 // it can be used to deduce width of dense layer if specified as N+10
579 // where N is the number of input variables
580 const size_t inputSize = GetNvar();
581
582 // Split layer details
583 TObjArray *subStrings = layerString.Tokenize(delim);
584 TIter nextToken(subStrings);
585 TObjString *token = (TObjString *)nextToken();
586 int idxToken = 0;
587
588 // loop on the tokens
589 // order of sepcifying width and activation function is not relevant
590 // both 100|TANH and TANH|100 are valid cases
591 for (; token != nullptr; token = (TObjString *)nextToken()) {
592 idxToken++;
593 // try a match with the activation function
594 TString strActFnc(token->GetString());
595 // if first token defines the layer type- skip it
596 if (strActFnc =="DENSE") continue;
597
598 if (strActFnc == "RELU") {
599 activationFunction = DNN::EActivationFunction::kRelu;
600 } else if (strActFnc == "TANH") {
601 activationFunction = DNN::EActivationFunction::kTanh;
602 } else if (strActFnc == "SYMMRELU") {
603 activationFunction = DNN::EActivationFunction::kSymmRelu;
604 } else if (strActFnc == "SOFTSIGN") {
605 activationFunction = DNN::EActivationFunction::kSoftSign;
606 } else if (strActFnc == "SIGMOID") {
607 activationFunction = DNN::EActivationFunction::kSigmoid;
608 } else if (strActFnc == "LINEAR") {
609 activationFunction = DNN::EActivationFunction::kIdentity;
610 } else if (strActFnc == "GAUSS") {
611 activationFunction = DNN::EActivationFunction::kGauss;
612 } else if (width == 0) {
613 // no match found try to parse as text showing the width
614 // support for input a formula where the variable 'x' is 'N' in the string
615 // use TFormula for the evaluation
616 TString strNumNodes = strActFnc;
617 // number of nodes
618 TString strN("x");
619 strNumNodes.ReplaceAll("N", strN);
620 strNumNodes.ReplaceAll("n", strN);
621 TFormula fml("tmp", strNumNodes);
622 width = fml.Eval(inputSize);
623 }
624
625 }
626 // avoid zero width. assume is 1
627 if (width == 0) width = 1;
628
629 // Add the dense layer, initialize the weights and biases and copy
630 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
631 denseLayer->Initialize();
632
633 // add same layer to fNet
634 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
635
636 //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
637
638 // add the copy to all slave nets
639 //for (size_t i = 0; i < nets.size(); i++) {
640 // nets[i].AddDenseLayer(copyDenseLayer);
641 //}
642
643 // check compatibility of added layer
644 // for a dense layer input should be 1 x 1 x DxHxW
645}
646
647////////////////////////////////////////////////////////////////////////////////
648/// Pases the layer string and creates the appropriate convolutional layer
649template <typename Architecture_t, typename Layer_t>
651 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
652 TString delim)
653{
654 int depth = 0;
655 int fltHeight = 0;
656 int fltWidth = 0;
657 int strideRows = 0;
658 int strideCols = 0;
659 int zeroPadHeight = 0;
660 int zeroPadWidth = 0;
662
663 // Split layer details
664 TObjArray *subStrings = layerString.Tokenize(delim);
665 TIter nextToken(subStrings);
666 TObjString *token = (TObjString *)nextToken();
667 int idxToken = 0;
668
669 for (; token != nullptr; token = (TObjString *)nextToken()) {
670 switch (idxToken) {
671 case 1: // depth
672 {
673 TString strDepth(token->GetString());
674 depth = strDepth.Atoi();
675 } break;
676 case 2: // filter height
677 {
678 TString strFltHeight(token->GetString());
679 fltHeight = strFltHeight.Atoi();
680 } break;
681 case 3: // filter width
682 {
683 TString strFltWidth(token->GetString());
684 fltWidth = strFltWidth.Atoi();
685 } break;
686 case 4: // stride in rows
687 {
688 TString strStrideRows(token->GetString());
689 strideRows = strStrideRows.Atoi();
690 } break;
691 case 5: // stride in cols
692 {
693 TString strStrideCols(token->GetString());
694 strideCols = strStrideCols.Atoi();
695 } break;
696 case 6: // zero padding height
697 {
698 TString strZeroPadHeight(token->GetString());
699 zeroPadHeight = strZeroPadHeight.Atoi();
700 } break;
701 case 7: // zero padding width
702 {
703 TString strZeroPadWidth(token->GetString());
704 zeroPadWidth = strZeroPadWidth.Atoi();
705 } break;
706 case 8: // activation function
707 {
708 TString strActFnc(token->GetString());
709 if (strActFnc == "RELU") {
710 activationFunction = DNN::EActivationFunction::kRelu;
711 } else if (strActFnc == "TANH") {
712 activationFunction = DNN::EActivationFunction::kTanh;
713 } else if (strActFnc == "SYMMRELU") {
714 activationFunction = DNN::EActivationFunction::kSymmRelu;
715 } else if (strActFnc == "SOFTSIGN") {
716 activationFunction = DNN::EActivationFunction::kSoftSign;
717 } else if (strActFnc == "SIGMOID") {
718 activationFunction = DNN::EActivationFunction::kSigmoid;
719 } else if (strActFnc == "LINEAR") {
720 activationFunction = DNN::EActivationFunction::kIdentity;
721 } else if (strActFnc == "GAUSS") {
722 activationFunction = DNN::EActivationFunction::kGauss;
723 }
724 } break;
725 }
726 ++idxToken;
727 }
728
729 // Add the convolutional layer, initialize the weights and biases and copy
730 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
731 zeroPadHeight, zeroPadWidth, activationFunction);
732 convLayer->Initialize();
733
734 // Add same layer to fNet
735 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
736 zeroPadHeight, zeroPadWidth, activationFunction);
737
738 //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
739
740 //// add the copy to all slave nets
741 //for (size_t i = 0; i < nets.size(); i++) {
742 // nets[i].AddConvLayer(copyConvLayer);
743 //}
744}
745
746////////////////////////////////////////////////////////////////////////////////
747/// Pases the layer string and creates the appropriate max pool layer
748template <typename Architecture_t, typename Layer_t>
750 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
751 TString delim)
752{
753
754 int filterHeight = 0;
755 int filterWidth = 0;
756 int strideRows = 0;
757 int strideCols = 0;
758
759 // Split layer details
760 TObjArray *subStrings = layerString.Tokenize(delim);
761 TIter nextToken(subStrings);
762 TObjString *token = (TObjString *)nextToken();
763 int idxToken = 0;
764
765 for (; token != nullptr; token = (TObjString *)nextToken()) {
766 switch (idxToken) {
767 case 1: // filter height
768 {
769 TString strFrmHeight(token->GetString());
770 filterHeight = strFrmHeight.Atoi();
771 } break;
772 case 2: // filter width
773 {
774 TString strFrmWidth(token->GetString());
775 filterWidth = strFrmWidth.Atoi();
776 } break;
777 case 3: // stride in rows
778 {
779 TString strStrideRows(token->GetString());
780 strideRows = strStrideRows.Atoi();
781 } break;
782 case 4: // stride in cols
783 {
784 TString strStrideCols(token->GetString());
785 strideCols = strStrideCols.Atoi();
786 } break;
787 }
788 ++idxToken;
789 }
790
791 // Add the Max pooling layer
792 // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
793 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
794
795 // Add the same layer to fNet
796 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
797
798
799 //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
800
801 //// add the copy to all slave nets
802 //for (size_t i = 0; i < nets.size(); i++) {
803 // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
804 //}
805}
806
807////////////////////////////////////////////////////////////////////////////////
808/// Pases the layer string and creates the appropriate reshape layer
809template <typename Architecture_t, typename Layer_t>
811 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
812 TString delim)
813{
814 int depth = 0;
815 int height = 0;
816 int width = 0;
817 bool flattening = false;
818
819 // Split layer details
820 TObjArray *subStrings = layerString.Tokenize(delim);
821 TIter nextToken(subStrings);
822 TObjString *token = (TObjString *)nextToken();
823 int idxToken = 0;
824
825 for (; token != nullptr; token = (TObjString *)nextToken()) {
826 if (token->GetString() == "FLAT") idxToken=4;
827 switch (idxToken) {
828 case 1: {
829 TString strDepth(token->GetString());
830 depth = strDepth.Atoi();
831 } break;
832 case 2: // height
833 {
834 TString strHeight(token->GetString());
835 height = strHeight.Atoi();
836 } break;
837 case 3: // width
838 {
839 TString strWidth(token->GetString());
840 width = strWidth.Atoi();
841 } break;
842 case 4: // flattening
843 {
844 TString flat(token->GetString());
845 if (flat == "FLAT") {
846 flattening = true;
847 }
848 } break;
849 }
850 ++idxToken;
851 }
852
853 // Add the reshape layer
854 // TReshapeLayer<Architecture_t> *reshapeLayer =
855 deepNet.AddReshapeLayer(depth, height, width, flattening);
856
857 // Add the same layer to fNet
858 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
859
860 //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
861
862 //// add the copy to all slave nets
863 //for (size_t i = 0; i < nets.size(); i++) {
864 // nets[i].AddReshapeLayer(copyReshapeLayer);
865 //}
866}
867
868////////////////////////////////////////////////////////////////////////////////
869/// Pases the layer string and creates the appropriate rnn layer
870template <typename Architecture_t, typename Layer_t>
872 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
873 TString delim)
874{
875 // int depth = 0;
876 int stateSize = 0;
877 int inputSize = 0;
878 int timeSteps = 0;
879 bool rememberState = false;
880
881 // Split layer details
882 TObjArray *subStrings = layerString.Tokenize(delim);
883 TIter nextToken(subStrings);
884 TObjString *token = (TObjString *)nextToken();
885 int idxToken = 0;
886
887 for (; token != nullptr; token = (TObjString *)nextToken()) {
888 switch (idxToken) {
889 case 1: // state size
890 {
891 TString strstateSize(token->GetString());
892 stateSize = strstateSize.Atoi();
893 } break;
894 case 2: // input size
895 {
896 TString strinputSize(token->GetString());
897 inputSize = strinputSize.Atoi();
898 } break;
899 case 3: // time steps
900 {
901 TString strtimeSteps(token->GetString());
902 timeSteps = strtimeSteps.Atoi();
903 }
904 case 4: // remember state (1 or 0)
905 {
906 TString strrememberState(token->GetString());
907 rememberState = (bool) strrememberState.Atoi();
908 } break;
909 }
910 ++idxToken;
911 }
912
913 // Add the recurrent layer, initialize the weights and biases and copy
914 TBasicRNNLayer<Architecture_t> *basicRNNLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize,
915 timeSteps, rememberState);
916 basicRNNLayer->Initialize();
917
918 // Add same layer to fNet
919 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
920
921 //TBasicRNNLayer<Architecture_t> *copyRNNLayer = new TBasicRNNLayer<Architecture_t>(*basicRNNLayer);
922
923 //// add the copy to all slave nets
924 //for (size_t i = 0; i < nets.size(); i++) {
925 // nets[i].AddBasicRNNLayer(copyRNNLayer);
926 //}
927}
928
929////////////////////////////////////////////////////////////////////////////////
930/// Pases the layer string and creates the appropriate lstm layer
931template <typename Architecture_t, typename Layer_t>
933 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
934 TString delim)
935{
936 // Split layer details
937 TObjArray *subStrings = layerString.Tokenize(delim);
938 TIter nextToken(subStrings);
939 TObjString *token = (TObjString *)nextToken();
940 int idxToken = 0;
941
942 for (; token != nullptr; token = (TObjString *)nextToken()) {
943 switch (idxToken) {
944 }
945 ++idxToken;
946 }
947}
948
949////////////////////////////////////////////////////////////////////////////////
950/// Standard constructor.
951MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
952 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputDepth(), fInputHeight(), fInputWidth(),
953 fBatchDepth(), fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(),
954 fInputLayoutString(), fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(),
955 fWeightInitializationString(), fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()
956{
957 // Nothing to do here
958}
959
960////////////////////////////////////////////////////////////////////////////////
961/// Constructor from a weight file.
962MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
963 : MethodBase(Types::kDL, theData, theWeightFile), fInputDepth(), fInputHeight(), fInputWidth(), fBatchDepth(),
964 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(), fInputLayoutString(),
965 fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
966 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()
967{
968 // Nothing to do here
969}
970
971////////////////////////////////////////////////////////////////////////////////
972/// Destructor.
974{
975 // Nothing to do here
976}
977
978////////////////////////////////////////////////////////////////////////////////
979/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
980auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
981{
982 // remove empty spaces
983 parseString.ReplaceAll(" ","");
984 KeyValueVector_t blockKeyValues;
985 const TString keyValueDelim("=");
986
987 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
988 TIter nextBlock(blockStrings);
989 TObjString *blockString = (TObjString *)nextBlock();
990
991 for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
992 blockKeyValues.push_back(std::map<TString, TString>());
993 std::map<TString, TString> &currentBlock = blockKeyValues.back();
994
995 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
996 TIter nextToken(subStrings);
997 TObjString *token = (TObjString *)nextToken();
998
999 for (; token != nullptr; token = (TObjString *)nextToken()) {
1000 TString strKeyValue(token->GetString());
1001 int delimPos = strKeyValue.First(keyValueDelim.Data());
1002 if (delimPos <= 0) continue;
1003
1004 TString strKey = TString(strKeyValue(0, delimPos));
1005 strKey.ToUpper();
1006 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1007
1008 strKey.Strip(TString::kBoth, ' ');
1009 strValue.Strip(TString::kBoth, ' ');
1010
1011 currentBlock.insert(std::make_pair(strKey, strValue));
1012 }
1013 }
1014 return blockKeyValues;
1015}
1016
1017////////////////////////////////////////////////////////////////////////////////
1018/// What kind of analysis type can handle the CNN
1020{
1021 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1022 if (type == Types::kMulticlass) return kTRUE;
1023 if (type == Types::kRegression) return kTRUE;
1024
1025 return kFALSE;
1026}
1027
1028////////////////////////////////////////////////////////////////////////////////
1029/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1030/// 100 etc.
1031/// - 20% and 0.2 selects 20% of the training set as validation data.
1032/// - 100 selects 100 events as the validation data.
1033///
1034/// @return number of samples in validation set
1035///
1037{
1038 Int_t nValidationSamples = 0;
1039 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1040
1041 // Parsing + Validation
1042 // --------------------
1043 if (fNumValidationString.EndsWith("%")) {
1044 // Relative spec. format 20%
1045 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1046
1047 if (intValStr.IsFloat()) {
1048 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1049 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1050 } else {
1051 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1052 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1053 }
1054 } else if (fNumValidationString.IsFloat()) {
1055 Double_t valSizeAsDouble = fNumValidationString.Atof();
1056
1057 if (valSizeAsDouble < 1.0) {
1058 // Relative spec. format 0.2
1059 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1060 } else {
1061 // Absolute spec format 100 or 100.0
1062 nValidationSamples = valSizeAsDouble;
1063 }
1064 } else {
1065 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1066 << Endl;
1067 }
1068
1069 // Value validation
1070 // ----------------
1071 if (nValidationSamples < 0) {
1072 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1073 }
1074
1075 if (nValidationSamples == 0) {
1076 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1077 }
1078
1079 if (nValidationSamples >= (Int_t)trainingSetSize) {
1080 Log() << kFATAL << "Validation size \"" << fNumValidationString
1081 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1082 }
1083
1084 return nValidationSamples;
1085}
1086
1087
1088////////////////////////////////////////////////////////////////////////////////
1089/// Implementation of architecture specific train method
1090///
1091template <typename Architecture_t>
1093{
1094
1095 using Scalar_t = typename Architecture_t::Scalar_t;
1098 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1099
1100 bool debug = Log().GetMinType() == kDEBUG;
1101
1102
1103 // Determine the number of outputs
1104 // // size_t outputSize = 1;
1105 // // if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
1106 // // outputSize = GetNTargets();
1107 // // } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
1108 // // outputSize = DataInfo().GetNClasses();
1109 // // }
1110
1111 // set the random seed for weight initialization
1112 Architecture_t::SetRandomSeed(fRandomSeed);
1113
1114 ///split training data in training and validation data
1115 // and determine the number of training and testing examples
1116
1117 size_t nValidationSamples = GetNumValidationSamples();
1118 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1119
1120 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1121 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1122 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1123
1124 size_t trainingPhase = 1;
1125
1126 for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1127
1128 size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1129
1130
1131 // After the processing of the options, initialize the master deep net
1132 size_t batchSize = settings.batchSize;
1133 // Should be replaced by actual implementation. No support for this now.
1134 size_t inputDepth = this->GetInputDepth();
1135 size_t inputHeight = this->GetInputHeight();
1136 size_t inputWidth = this->GetInputWidth();
1137 size_t batchDepth = this->GetBatchDepth();
1138 size_t batchHeight = this->GetBatchHeight();
1139 size_t batchWidth = this->GetBatchWidth();
1140 ELossFunction J = this->GetLossFunction();
1142 ERegularization R = settings.regularization;
1143 EOptimizer O = settings.optimizer;
1144 Scalar_t weightDecay = settings.weightDecay;
1145
1146 //Batch size should be included in batch layout as well. There are two possibilities:
1147 // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1148 // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1149 // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1150 // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1151
1152 if (batchDepth != batchSize && batchDepth > 1) {
1153 Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1154 return;
1155 }
1156 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1157 Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1158 return;
1159 }
1160
1161
1162 //check also that input layout compatible with batch layout
1163 bool badLayout = false;
1164 // case batch depth == batch size
1165 if (batchDepth == batchSize)
1166 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1167 // case batch Height is batch size
1168 if (batchHeight == batchSize && batchDepth == 1)
1169 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1170 if (badLayout) {
1171 Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1172 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1173 return;
1174 }
1175
1176 // check batch size is compatible with number of events
1177 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1178 Log() << kFATAL << "Number of samples in the datasets are train: ("
1179 << nTrainingSamples << ") test: (" << nValidationSamples
1180 << "). One of these is smaller than the batch size of "
1181 << settings.batchSize << ". Please increase the batch"
1182 << " size to be at least the same size as the smallest"
1183 << " of them." << Endl;
1184 }
1185
1186 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1187
1188 // create a copy of DeepNet for evaluating but with batch size = 1
1189 // fNet is the saved network and will be with CPU or Referrence architecture
1190 if (trainingPhase == 1) {
1191 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1192 batchHeight, batchWidth, J, I, R, weightDecay));
1193 fBuildNet = true;
1194 }
1195 else
1196 fBuildNet = false;
1197
1198 // Initialize the vector of slave nets
1199 std::vector<DeepNet_t> nets{};
1200 nets.reserve(nThreads);
1201 for (size_t i = 0; i < nThreads; i++) {
1202 // create a copies of the master deep net
1203 nets.push_back(deepNet);
1204 }
1205
1206
1207 // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1208 CreateDeepNet(deepNet, nets);
1209
1210
1211 // set droput probabilities
1212 // use convention to store in the layer 1.- dropout probabilities
1213 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1214 for (auto & p : dropoutVector) {
1215 p = 1.0 - p;
1216 }
1217 deepNet.SetDropoutProbabilities(dropoutVector);
1218
1219 if (trainingPhase > 1) {
1220 // copy initial weights from fNet to deepnet
1221 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1222 const auto & nLayer = fNet->GetLayerAt(i);
1223 const auto & dLayer = deepNet.GetLayerAt(i);
1224 // could use a traits for detecting equal architectures
1225 // dLayer->CopyWeights(nLayer->GetWeights());
1226 // dLayer->CopyBiases(nLayer->GetBiases());
1227 Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );
1228 Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );
1229 }
1230 }
1231
1232 // when fNet is built create also input matrix that will be used to evaluate it
1233 if (fBuildNet) {
1234 int n1 = batchHeight;
1235 int n2 = batchWidth;
1236 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1237 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1238 fXInput.emplace_back(MatrixImpl_t(n1,n2));
1239 // create pointer to output matrix used for the predictions
1240 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1241
1242 // print the created network
1243 Log() << "***** Deep Learning Network *****" << Endl;
1244 if (Log().GetMinType() <= kINFO)
1245 deepNet.Print();
1246 }
1247 Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1248
1249 // Loading the training and validation datasets
1250 TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1251 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, deepNet.GetBatchSize(),
1252 deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1253 deepNet.GetOutputWidth(), nThreads);
1254
1255 TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1256 TensorDataLoader_t validationData(validationTuple, nValidationSamples, deepNet.GetBatchSize(),
1257 deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1258 deepNet.GetOutputWidth(), nThreads);
1259
1260
1261
1262 // do an evaluation of the network to compute initial minimum test error
1263
1264 Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1265
1266 Double_t minValError = 0.0;
1267 for (auto batch : validationData) {
1268 auto inputTensor = batch.GetInput();
1269 auto outputMatrix = batch.GetOutput();
1270 auto weights = batch.GetWeights();
1271 // should we apply droput to the loss ??
1272 minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1273 }
1274 // add Regularization term
1275 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1276 minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1277 minValError += regzTerm;
1278
1279
1280 // create a pointer to base class VOptimizer
1281 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1282
1283 // initialize the base class pointer with the corresponding derived class object.
1284 switch (O) {
1285
1286 case EOptimizer::kSGD:
1287 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1288 new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1289 break;
1290
1291 case EOptimizer::kAdam:
1292 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1293 new DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1294 break;
1295
1296 case EOptimizer::kAdagrad:
1297 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1298 new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1299 break;
1300
1301 case EOptimizer::kRMSProp:
1302 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1303 new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum));
1304 break;
1305
1306 case EOptimizer::kAdadelta:
1307 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1308 new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1309 break;
1310 }
1311
1312
1313 // Initialize the vector of batches, one batch for one slave network
1314 std::vector<TTensorBatch<Architecture_t>> batches{};
1315
1316 bool converged = false;
1317 size_t convergenceCount = 0;
1318 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1319
1320 // start measuring
1321 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1322 tstart = std::chrono::system_clock::now();
1323
1324 Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1325 << " Optimizer " << settings.optimizerName
1326 << " Learning rate = " << settings.learningRate
1327 << " regularization " << (char) settings.regularization
1328 << " minimum error = " << minValError
1329 << Endl;
1330 if (!fInteractive) {
1331 std::string separator(62, '-');
1332 Log() << separator << Endl;
1333 Log() << std::setw(10) << "Epoch"
1334 << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err."
1335 << std::setw(12) << "t(s)/epoch" << std::setw(12) << "t(s)/Loss"
1336 << std::setw(12) << "nEvents/s"
1337 << std::setw(12) << "Conv. Steps" << Endl;
1338 Log() << separator << Endl;
1339 }
1340
1341 // set up generator for shuffling the batches
1342 // if seed is zero we have always a different order in the batches
1343 size_t shuffleSeed = 0;
1344 if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1345 RandomGenerator<TRandom3> rng(shuffleSeed);
1346
1347 // print weights before
1348 if (fBuildNet && debug) {
1349 Log() << "Initial Deep Net Weights " << Endl;
1350 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1351 for (size_t l = 0; l < weights_tensor.size(); ++l)
1352 weights_tensor[l].Print();
1353 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1354 bias_tensor[0].Print();
1355 }
1356
1357
1358 while (!converged) {
1359 optimizer->IncrementGlobalStep();
1360 trainingData.Shuffle(rng);
1361
1362 // execute all epochs
1363 //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1364
1365 for (size_t i = 0; i < batchesInEpoch; ++i ) {
1366 // Clean and load new batches, one batch for one slave net
1367 //batches.clear();
1368 //batches.reserve(nThreads);
1369 //for (size_t j = 0; j < nThreads; j++) {
1370 // batches.push_back(trainingData.GetTensorBatch());
1371 //}
1372
1373 auto my_batch = trainingData.GetTensorBatch();
1374
1375
1376 // execute one optimization step
1377 deepNet.Forward(my_batch.GetInput(), true);
1378 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1379 optimizer->Step();
1380 }
1381 //}
1382
1383 if ((optimizer->GetGlobalStep() % settings.testInterval) == 0) {
1384
1385 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1386
1387 t1 = std::chrono::system_clock::now();
1388
1389 // Compute validation error.
1390 Double_t valError = 0.0;
1391 for (auto batch : validationData) {
1392 auto inputTensor = batch.GetInput();
1393 auto outputMatrix = batch.GetOutput();
1394 auto weights = batch.GetWeights();
1395 // should we apply droput to the loss ??
1396 valError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1397 }
1398 // normalize loss to number of batches and add regularization term
1399 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1400 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1401 valError += regTerm;
1402
1403 t2 = std::chrono::system_clock::now();
1404
1405 // checking for convergence
1406 if (valError < minValError) {
1407 convergenceCount = 0;
1408 } else {
1409 convergenceCount += settings.testInterval;
1410 }
1411
1412 // copy configuration when reached a minimum error
1413 if (valError < minValError ) {
1414 // Copy weights from deepNet to fNet
1415 Log() << std::setw(10) << optimizer->GetGlobalStep()
1416 << " Minimum Test error found - save the configuration " << Endl;
1417 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1418 const auto & nLayer = fNet->GetLayerAt(i);
1419 const auto & dLayer = deepNet.GetLayerAt(i);
1420 ArchitectureImpl_t::CopyDiffArch(nLayer->GetWeights(), dLayer->GetWeights() );
1421 ArchitectureImpl_t::CopyDiffArch(nLayer->GetBiases(), dLayer->GetBiases() );
1422 // std::cout << "Weights for layer " << i << std::endl;
1423 // for (size_t k = 0; k < dlayer->GetWeights().size(); ++k)
1424 // dLayer->GetWeightsAt(k).Print();
1425 }
1426 minValError = valError;
1427 }
1428 else if ( minValError <= 0. )
1429 minValError = valError;
1430
1431
1432 Double_t trainingError = 0.0;
1433 // Compute training error.
1434 for (auto batch : trainingData) {
1435 auto inputTensor = batch.GetInput();
1436 auto outputMatrix = batch.GetOutput();
1437 auto weights = batch.GetWeights();
1438 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1439 }
1440 // normalize loss to number of batches and add regularization term
1441 trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1442 trainingError += regTerm;
1443
1444 // stop measuring
1445 tend = std::chrono::system_clock::now();
1446
1447 // Compute numerical throughput.
1448 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1449 std::chrono::duration<double> elapsed1 = t1-tstart;
1450 // std::chrono::duration<double> elapsed2 = t2-tstart;
1451 // time to compute training and test errors
1452 std::chrono::duration<double> elapsed_testing = tend-t1;
1453
1454 double seconds = elapsed_seconds.count();
1455 // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1456 // nGFlops *= deepnet.GetNFlops() * 1e-9;
1457 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1458
1459 converged =
1460 convergenceCount > settings.convergenceSteps || optimizer->GetGlobalStep() >= settings.maxEpochs;
1461
1462
1463 Log() << std::setw(10) << optimizer->GetGlobalStep() << " | "
1464 << std::setw(12) << trainingError
1465 << std::setw(12) << valError
1466 << std::setw(12) << seconds / settings.testInterval
1467 << std::setw(12) << elapsed_testing.count()
1468 << std::setw(12) << 1. / eventTime
1469 << std::setw(12) << convergenceCount
1470 << Endl;
1471
1472 if (converged) {
1473 Log() << Endl;
1474 }
1475 tstart = std::chrono::system_clock::now();
1476 }
1477
1478 // if (stepCount % 10 == 0 || converged) {
1479 if (converged && debug) {
1480 Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << optimizer->GetGlobalStep()
1481 << Endl;
1482 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1483 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1484 for (size_t l = 0; l < weights_tensor.size(); ++l)
1485 weights_tensor[l].Print();
1486 bias_tensor[0].Print();
1487 }
1488
1489 }
1490
1491 trainingPhase++;
1492 } // end loop on training Phase
1493}
1494
1495////////////////////////////////////////////////////////////////////////////////
1497{
1498 if (fInteractive) {
1499 Log() << kFATAL << "Not implemented yet" << Endl;
1500 return;
1501 }
1502
1503 // using for training same scalar type defined for the prediction
1504 if (this->GetArchitectureString() == "GPU") {
1505#ifdef R__HAS_TMVAGPU
1506 Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1507 TrainDeepNet<DNN::TCuda<ScalarImpl_t> >();
1508#else
1509 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1510 "you have CUDA installed and it was successfully "
1511 "detected by CMAKE."
1512 << Endl;
1513 return;
1514#endif
1515 } else if (this->GetArchitectureString() == "OPENCL") {
1516 Log() << kFATAL << "OPENCL backend not yet supported." << Endl;
1517 return;
1518 } else if (this->GetArchitectureString() == "CPU") {
1519#ifdef R__HAS_TMVACPU
1520 // note that number of threads used for BLAS might be different
1521 // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1522 Log() << kINFO << "Start of deep neural network training on CPU using (for ROOT-IMT) nthreads = "
1523 << gConfig().GetNCpu() << Endl << Endl;
1524 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1525#else
1526 Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1527 "you have a BLAS implementation and it was successfully "
1528 "detected by CMake as well that the imt CMake flag is set."
1529 << Endl;
1530 return;
1531#endif
1532 } else if (this->GetArchitectureString() == "STANDARD") {
1533 Log() << kINFO << "Start of deep neural network training on the STANDARD architecture" << Endl << Endl;
1534 TrainDeepNet<DNN::TReference<ScalarImpl_t> >();
1535 }
1536 else {
1537 Log() << kFATAL << this->GetArchitectureString() <<
1538 " is not a supported archiectire for TMVA::MethodDL"
1539 << Endl;
1540 }
1541
1542// /// definitions for CUDA
1543// #ifdef R__HAS_TMVAGPU // Included only if DNNCUDA flag is set.
1544// using Architecture_t = DNN::TCuda<Double_t>;
1545// #else
1546// #ifdef R__HAS_TMVACPU // Included only if DNNCPU flag is set.
1547// using Architecture_t = DNN::TCpu<Double_t>;
1548// #else
1549// using Architecture_t = DNN::TReference<Double_t>;
1550// #endif
1551// #endif
1552}
1553
1554
1555////////////////////////////////////////////////////////////////////////////////
1556Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1557{
1558
1559 // note that fNet should have been build with a batch size of 1
1560
1561 if (!fNet || fNet->GetDepth() == 0) {
1562 Log() << kFATAL << "The network has not been trained and fNet is not built"
1563 << Endl;
1564 }
1565
1566 // input size must be equal to 1 which is the batch size of fNet
1567 R__ASSERT(fXInput.size() == 1 && fNet->GetBatchSize() == 1);
1568
1569 // int batchWidth = fNet->GetBatchWidth();
1570 // int batchDepth = fNet->GetBatchDepth();
1571 // int batchHeight = fNet->GetBatchHeight();
1572// int noutput = fNet->GetOutputWidth();
1573
1574
1575 // get current event
1576 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1577
1578 int n1 = fXInput[0].GetNrows();
1579 int n2 = fXInput[0].GetNcols();
1580
1581 int nVariables = GetEvent()->GetNVariables();
1582
1583
1584 if (n1*n2 != nVariables) {
1585 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1586 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1587 << Endl;
1588 }
1589 // get the event data in input matrix
1590 for (int j = 0; j < n1; ++j) {
1591 for (int k = 0; k < n2; k++) {
1592 fXInput[0](j, k) = inputValues[j*n2+k];
1593 }
1594 }
1595
1596 // perform the prediction
1597 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1598
1599 // return value
1600 double mvaValue = (*fYHat)(0, 0);
1601
1602 // for debugging
1603#ifdef DEBUG_MVAVALUE
1604 using Tensor_t = std::vector<MatrixImpl_t>;
1605 TMatrixF xInput(n1,n2, inputValues.data() );
1606 std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1607 xInput.Print();
1608 std::cout << "Output of DeepNet " << mvaValue << std::endl;
1609 auto & deepnet = *fNet;
1610 std::cout << "Loop on layers " << std::endl;
1611 for (int l = 0; l < deepnet.GetDepth(); ++l) {
1612 std::cout << "Layer " << l;
1613 const auto * layer = deepnet.GetLayerAt(l);
1614 const Tensor_t & layer_output = layer->GetOutput();
1615 layer->Print();
1616 std::cout << "DNN output " << layer_output.size() << std::endl;
1617 for (size_t i = 0; i < layer_output.size(); ++i) {
1618#ifdef R__HAS_TMVAGPU
1619 //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1620 TMatrixD m = layer_output[i];
1621#else
1622 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1623#endif
1624 m.Print();
1625 }
1626 const Tensor_t & layer_weights = layer->GetWeights();
1627 std::cout << "DNN weights " << layer_weights.size() << std::endl;
1628 if (layer_weights.size() > 0) {
1629 int i = 0;
1630#ifdef R__HAS_TMVAGPU
1631 TMatrixD m = layer_weights[i];
1632// TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1633#else
1634 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1635#endif
1636 m.Print();
1637 }
1638 }
1639#endif
1640
1641 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1642}
1643////////////////////////////////////////////////////////////////////////////////
1644/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1645////////////////////////////////////////////////////////////////////////////////
1646template <typename Architecture_t>
1647std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1648{
1649
1650 // Check whether the model is setup
1651 if (!fNet || fNet->GetDepth() == 0) {
1652 Log() << kFATAL << "The network has not been trained and fNet is not built"
1653 << Endl;
1654 }
1655
1656 // rebuild the networks
1657
1658 size_t inputDepth = this->GetInputDepth();
1659 size_t inputHeight = this->GetInputHeight();
1660 size_t inputWidth = this->GetInputWidth();
1661 size_t batchDepth = this->GetBatchDepth();
1662 size_t batchHeight = this->GetBatchHeight();
1663 size_t batchWidth = this->GetBatchWidth();
1664 ELossFunction J = fNet->GetLossFunction();
1665 EInitialization I = fNet->GetInitialization();
1666 ERegularization R = fNet->GetRegularization();
1667 Double_t weightDecay = fNet->GetWeightDecay();
1668
1669 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1670 using Matrix_t = typename Architecture_t::Matrix_t;
1671 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1672
1673 // create the deep neural network
1674 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1675 std::vector<DeepNet_t> nets{};
1676 fBuildNet = false;
1677 CreateDeepNet(deepNet,nets);
1678
1679 // copy weights from the saved fNet to the built DeepNet
1680 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1681 const auto & nLayer = fNet->GetLayerAt(i);
1682 const auto & dLayer = deepNet.GetLayerAt(i);
1683 Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );
1684 Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );
1685 }
1686
1687 size_t n1 = deepNet.GetBatchHeight();
1688 size_t n2 = deepNet.GetBatchWidth();
1689 size_t n0 = deepNet.GetBatchSize();
1690 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1691 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1692 n1 = deepNet.GetBatchSize();
1693 n0 = 1;
1694 }
1695
1696 Long64_t nEvents = lastEvt - firstEvt;
1697 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1698 TensorDataLoader_t testData(testTuple, nEvents, batchSize, n0, n1, n2, deepNet.GetOutputWidth(), 1);
1699
1700
1701 // Tensor_t xInput;
1702 // for (size_t i = 0; i < n0; ++i)
1703 // xInput.emplace_back(Matrix_t(n1,n2));
1704
1705 // create pointer to output matrix used for the predictions
1706 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1707
1708 // use timer
1709 Timer timer( nEvents, GetName(), kTRUE );
1710
1711 if (logProgress)
1712 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1713 << "Evaluation of " << GetMethodName() << " on "
1714 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1715 << " sample (" << nEvents << " events)" << Endl;
1716
1717
1718 // eventg loop
1719 std::vector<double> mvaValues(nEvents);
1720
1721
1722 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1723
1724 Long64_t ievt_end = ievt + batchSize;
1725 // case of batch prediction for
1726 if (ievt_end <= lastEvt) {
1727
1728 if (ievt == firstEvt) {
1729 Data()->SetCurrentEvent(ievt);
1730 size_t nVariables = GetEvent()->GetNVariables();
1731
1732 if (n1 == batchSize && n0 == 1) {
1733 if (n2 != nVariables) {
1734 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1735 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1736 << Endl;
1737 }
1738 } else {
1739 if (n1*n2 != nVariables || n0 != batchSize) {
1740 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1741 << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1742 << Endl;
1743 }
1744 }
1745 }
1746
1747 auto batch = testData.GetTensorBatch();
1748 auto inputTensor = batch.GetInput();
1749
1750 auto xInput = batch.GetInput();
1751 // make the prediction
1752 deepNet.Prediction(yHat, xInput, fOutputFunction);
1753 for (size_t i = 0; i < batchSize; ++i) {
1754 double value = yHat(i,0);
1755 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1756 }
1757 }
1758 else {
1759 // case of remaining events: compute prediction by single event !
1760 for (Long64_t i = ievt; i < lastEvt; ++i) {
1761 Data()->SetCurrentEvent(i);
1762 mvaValues[i] = GetMvaValue();
1763 }
1764 }
1765 }
1766
1767 if (logProgress) {
1768 Log() << kINFO
1769 << "Elapsed time for evaluation of " << nEvents << " events: "
1770 << timer.GetElapsedTime() << " " << Endl;
1771 }
1772
1773 return mvaValues;
1774}
1775
1776const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1777{
1778 size_t nVariables = GetEvent()->GetNVariables();
1779 MatrixImpl_t X(1, nVariables);
1780 std::vector<MatrixImpl_t> X_vec;
1781 const Event *ev = GetEvent();
1782 const std::vector<Float_t>& inputValues = ev->GetValues();
1783 for (size_t i = 0; i < nVariables; i++) {
1784 X(0,i) = inputValues[i];
1785 }
1786 X_vec.emplace_back(X);
1787 size_t nTargets = std::max(1u, ev->GetNTargets());
1788 MatrixImpl_t YHat(1, nTargets);
1789 std::vector<Float_t> output(nTargets);
1790 fNet->Prediction(YHat, X_vec, fOutputFunction);
1791
1792 for (size_t i = 0; i < nTargets; i++)
1793 output[i] = YHat(0, i);
1794
1795 if (fRegressionReturnVal == NULL) {
1796 fRegressionReturnVal = new std::vector<Float_t>();
1797 }
1798 fRegressionReturnVal->clear();
1799
1800 Event * evT = new Event(*ev);
1801 for (size_t i = 0; i < nTargets; ++i) {
1802 evT->SetTarget(i, output[i]);
1803 }
1804
1805 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1806 for (size_t i = 0; i < nTargets; ++i) {
1807 fRegressionReturnVal->push_back(evT2->GetTarget(i));
1808 }
1809 delete evT;
1810 return *fRegressionReturnVal;
1811}
1812
1813const std::vector<Float_t> & TMVA::MethodDL::GetMulticlassValues()
1814{
1815 size_t nVariables = GetEvent()->GetNVariables();
1816 MatrixImpl_t X(1, nVariables);
1817 std::vector<MatrixImpl_t> X_vec;
1818 MatrixImpl_t YHat(1, DataInfo().GetNClasses());
1819 if (fMulticlassReturnVal == NULL) {
1820 fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1821 }
1822
1823 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1824 for (size_t i = 0; i < nVariables; i++) {
1825 X(0,i) = inputValues[i];
1826 }
1827 X_vec.emplace_back(X);
1828 fNet->Prediction(YHat, X_vec, fOutputFunction);
1829 for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1830 (*fMulticlassReturnVal)[i] = YHat(0, i);
1831 }
1832 return *fMulticlassReturnVal;
1833}
1834
1835
1836////////////////////////////////////////////////////////////////////////////////
1837/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1838////////////////////////////////////////////////////////////////////////////////
1839std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
1840{
1841
1842
1843 Long64_t nEvents = Data()->GetNEvents();
1844 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
1845 if (firstEvt < 0) firstEvt = 0;
1846 nEvents = lastEvt-firstEvt;
1847
1848 // use same batch size as for training (from first strategy)
1849 size_t batchSize = (fTrainingSettings.empty()) ? 1000 : fTrainingSettings.front().batchSize;
1850 if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
1851
1852 // using for training same scalar type defined for the prediction
1853 if (this->GetArchitectureString() == "GPU") {
1854#ifdef R__HAS_TMVAGPU
1855 Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
1856 return PredictDeepNet<DNN::TCuda<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1857#endif
1858 } else if (this->GetArchitectureString() == "CPU") {
1859#ifdef R__HAS_TMVACPU
1860 Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
1861 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1862#endif
1863 }
1864 Log() << kINFO << "Evaluate deep neural network on the STANDARD architecture using batches with size = " << batchSize
1865 << Endl << Endl;
1866 return PredictDeepNet<DNN::TReference<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1867}
1868////////////////////////////////////////////////////////////////////////////////
1869void MethodDL::AddWeightsXMLTo(void * parent) const
1870{
1871 // Create the parent XML node with name "Weights"
1872 auto & xmlEngine = gTools().xmlengine();
1873 void* nn = xmlEngine.NewChild(parent, 0, "Weights");
1874
1875 /*! Get all necessary information, in order to be able to reconstruct the net
1876 * if we read the same XML file. */
1877
1878 // Deep Net specific info
1879 Int_t depth = fNet->GetDepth();
1880
1881 Int_t inputDepth = fNet->GetInputDepth();
1882 Int_t inputHeight = fNet->GetInputHeight();
1883 Int_t inputWidth = fNet->GetInputWidth();
1884
1885 Int_t batchSize = fNet->GetBatchSize();
1886
1887 Int_t batchDepth = fNet->GetBatchDepth();
1888 Int_t batchHeight = fNet->GetBatchHeight();
1889 Int_t batchWidth = fNet->GetBatchWidth();
1890
1891 char lossFunction = static_cast<char>(fNet->GetLossFunction());
1892 char initialization = static_cast<char>(fNet->GetInitialization());
1893 char regularization = static_cast<char>(fNet->GetRegularization());
1894
1895 Double_t weightDecay = fNet->GetWeightDecay();
1896
1897 // Method specific info (not sure these are needed)
1898 char outputFunction = static_cast<char>(this->GetOutputFunction());
1899 //char lossFunction = static_cast<char>(this->GetLossFunction());
1900
1901 // Add attributes to the parent node
1902 xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
1903
1904 xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
1905 xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
1906 xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
1907
1908 xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
1909 xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
1910 xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
1911 xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
1912
1913 xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
1914 xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
1915 xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
1916 xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
1917
1918 gTools().AddAttr(nn, "WeightDecay", weightDecay);
1919
1920
1921 for (Int_t i = 0; i < depth; i++)
1922 {
1923 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
1924 }
1925
1926
1927}
1928
1929////////////////////////////////////////////////////////////////////////////////
1931{
1932
1933 auto netXML = gTools().GetChild(rootXML, "Weights");
1934 if (!netXML){
1935 netXML = rootXML;
1936 }
1937
1938 size_t netDepth;
1939 gTools().ReadAttr(netXML, "NetDepth", netDepth);
1940
1941 size_t inputDepth, inputHeight, inputWidth;
1942 gTools().ReadAttr(netXML, "InputDepth", inputDepth);
1943 gTools().ReadAttr(netXML, "InputHeight", inputHeight);
1944 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1945
1946 size_t batchSize, batchDepth, batchHeight, batchWidth;
1947 gTools().ReadAttr(netXML, "BatchSize", batchSize);
1948 // use always batchsize = 1
1949 //batchSize = 1;
1950 gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
1951 gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
1952 gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
1953
1954 char lossFunctionChar;
1955 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1956 char initializationChar;
1957 gTools().ReadAttr(netXML, "Initialization", initializationChar);
1958 char regularizationChar;
1959 gTools().ReadAttr(netXML, "Regularization", regularizationChar);
1960 char outputFunctionChar;
1961 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1962 double weightDecay;
1963 gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
1964
1965 // create the net
1966
1967 // DeepNetCpu_t is defined in MethodDL.h
1968 this->SetInputDepth(inputDepth);
1969 this->SetInputHeight(inputHeight);
1970 this->SetInputWidth(inputWidth);
1971 this->SetBatchDepth(batchDepth);
1972 this->SetBatchHeight(batchHeight);
1973 this->SetBatchWidth(batchWidth);
1974
1975
1976
1977 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
1978 batchHeight, batchWidth,
1979 static_cast<ELossFunction>(lossFunctionChar),
1980 static_cast<EInitialization>(initializationChar),
1981 static_cast<ERegularization>(regularizationChar),
1982 weightDecay));
1983
1984 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1985
1986
1987 //size_t previousWidth = inputWidth;
1988 auto layerXML = gTools().xmlengine().GetChild(netXML);
1989
1990 // loop on the layer and add them to the network
1991 for (size_t i = 0; i < netDepth; i++) {
1992
1993 TString layerName = gTools().xmlengine().GetNodeName(layerXML);
1994
1995 // case of dense layer
1996 if (layerName == "DenseLayer") {
1997
1998 // read width and activation function and then we can create the layer
1999 size_t width = 0;
2000 gTools().ReadAttr(layerXML, "Width", width);
2001
2002 // Read activation function.
2003 TString funcString;
2004 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2005 EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2006
2007
2008 fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2009
2010 }
2011 // Convolutional Layer
2012 else if (layerName == "ConvLayer") {
2013
2014 // read width and activation function and then we can create the layer
2015 size_t depth = 0;
2016 gTools().ReadAttr(layerXML, "Depth", depth);
2017 size_t fltHeight, fltWidth = 0;
2018 size_t strideRows, strideCols = 0;
2019 size_t padHeight, padWidth = 0;
2020 gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2021 gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2022 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2023 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2024 gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2025 gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2026
2027 // Read activation function.
2028 TString funcString;
2029 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2030 EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2031
2032
2033 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2034 padHeight, padWidth, actFunction);
2035
2036 }
2037
2038 // MaxPool Layer
2039 else if (layerName == "MaxPoolLayer") {
2040
2041 // read maxpool layer info
2042 size_t filterHeight, filterWidth = 0;
2043 size_t strideRows, strideCols = 0;
2044 gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2045 gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2046 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2047 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2048
2049 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2050 }
2051 else if (layerName == "ReshapeLayer") {
2052
2053 // read reshape layer info
2054 size_t depth, height, width = 0;
2055 gTools().ReadAttr(layerXML, "Depth", depth);
2056 gTools().ReadAttr(layerXML, "Height", height);
2057 gTools().ReadAttr(layerXML, "Width", width);
2058 int flattening = 0;
2059 gTools().ReadAttr(layerXML, "Flattening",flattening );
2060
2061 fNet->AddReshapeLayer(depth, height, width, flattening);
2062
2063 }
2064 else if (layerName == "RNNLayer") {
2065
2066 // read RNN layer info
2067 size_t stateSize,inputSize, timeSteps = 0;
2068 int rememberState= 0;
2069 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2070 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2071 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2072 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2073
2074 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
2075
2076 }
2077
2078
2079 // read eventually weights and biases
2080 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2081
2082 // read next layer
2083 layerXML = gTools().GetNextChild(layerXML);
2084 }
2085
2086 fBuildNet = false;
2087 // create now the input and output matrices
2088 int n1 = batchHeight;
2089 int n2 = batchWidth;
2090 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2091 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
2092 if (fXInput.size() > 0) fXInput.clear();
2093 fXInput.emplace_back(MatrixImpl_t(n1,n2));
2094 // create pointer to output matrix used for the predictions
2095 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2096
2097
2098}
2099
2100
2101////////////////////////////////////////////////////////////////////////////////
2102void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2103{
2104}
2105
2106////////////////////////////////////////////////////////////////////////////////
2108{
2109 // TODO
2110 return NULL;
2111}
2112
2113////////////////////////////////////////////////////////////////////////////////
2115{
2116 // TODO
2117}
2118
2119} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:96
int type
Definition: TGX11.cxx:120
char * Form(const char *fmt,...)
The Formula class.
Definition: TFormula.h:84
Double_t Eval(Double_t x) const
Sets first variable (e.g. x) and evaluate formula.
Definition: TFormula.cxx:3318
UInt_t GetNCpu()
Definition: Config.h:72
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Definition: Configurable.h:168
MsgLogger & Log() const
Definition: Configurable.h:122
Adadelta Optimizer class.
Definition: Adadelta.h:44
Adagrad Optimizer class.
Definition: Adagrad.h:44
Adam Optimizer class.
Definition: Adam.h:44
Generic Deep Neural Network class.
Definition: DeepNet.h:74
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:618
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:449
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:403
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:488
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:651
Generic layer class.
Definition: DenseLayer.h:56
RMSProp Optimizer class.
Definition: RMSProp.h:44
static void CopyDiffArch(TMatrixT< Scalar_t > &A, const AMatrix_t &B)
Definition: Reference.h:569
Stochastic Batch Gradient Descent Optimizer class.
Definition: SGD.h:45
Generic General Layer class.
Definition: GeneralLayer.h:46
void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:376
Class that contains all the data information.
Definition: DataSetInfo.h:60
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:320
UInt_t GetClass() const
Definition: Event.h:87
std::vector< Float_t > & GetValues()
Definition: Event.h:95
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:103
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
const char * GetName() const
Definition: MethodBase.h:325
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:675
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
const TString & GetMethodName() const
Definition: MethodBase.h:322
const Event * GetEvent() const
Definition: MethodBase.h:740
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
UInt_t GetNVariables() const
Definition: MethodBase.h:336
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:584
UInt_t GetNvar() const
Definition: MethodBase.h:335
DataSet * Data() const
Definition: MethodBase.h:400
IPythonInteractive * fInteractive
Definition: MethodBase.h:437
void SetInputWidth(size_t inputWidth)
Definition: MethodDL.h:260
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition: MethodDL.h:164
void GetHelpMessage() const
Definition: MethodDL.cxx:2114
DNN::ELossFunction fLossFunction
The loss function.
Definition: MethodDL.h:171
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDL.cxx:1813
size_t fInputHeight
The height of the input.
Definition: MethodDL.h:160
TString fLayoutString
The string defining the layout of the deep net.
Definition: MethodDL.h:175
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:98
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1496
size_t GetBatchHeight() const
Definition: MethodDL.h:235
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
Definition: MethodDL.cxx:1839
void ParseRnnLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:871
TString fWeightInitializationString
The string defining the weight initialization method.
Definition: MethodDL.h:178
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:749
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:167
TString fArchitectureString
The string defining the architecure: CPU or GPU.
Definition: MethodDL.h:179
void Init()
default initializations
Definition: MethodDL.cxx:418
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:951
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1092
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:252
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:241
void ParseLstmLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate lstm layer.
Definition: MethodDL.cxx:932
void SetInputDepth(size_t inputDepth)
Setters.
Definition: MethodDL.h:258
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:570
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition: MethodDL.h:245
void ProcessOptions()
Definition: MethodDL.cxx:221
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition: MethodDL.h:165
size_t GetInputDepth() const
Definition: MethodDL.h:230
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDL.cxx:1776
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:99
TString GetInputLayoutString() const
Definition: MethodDL.h:244
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:263
std::vector< MatrixImpl_t > fXInput
Definition: MethodDL.h:97
size_t GetInputHeight() const
Definition: MethodDL.h:231
TString GetArchitectureString() const
Definition: MethodDL.h:250
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:471
void ReadWeightsFromStream(std::istream &)
Definition: MethodDL.cxx:2102
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:1930
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition: MethodDL.h:180
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:82
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:170
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:169
size_t GetBatchDepth() const
Definition: MethodDL.h:234
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:185
size_t GetInputWidth() const
Definition: MethodDL.h:232
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:242
TString fBatchLayoutString
The string defining the layout of the batch.
Definition: MethodDL.h:174
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:1019
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:650
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:810
TString fTrainingStrategyString
The string defining the training strategy.
Definition: MethodDL.h:177
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2107
void SetInputHeight(size_t inputHeight)
Definition: MethodDL.h:259
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:262
size_t fInputWidth
The width of the input.
Definition: MethodDL.h:161
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:980
size_t fInputDepth
The depth of the input.
Definition: MethodDL.h:159
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:264
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1647
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:240
TString GetLayoutString() const
Definition: MethodDL.h:246
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition: MethodDL.h:163
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition: MethodDL.h:93
size_t GetBatchWidth() const
Definition: MethodDL.h:236
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:1869
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:94
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1556
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:973
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:425
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:182
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:518
TString fErrorStrategy
The string defining the error strategy for training.
Definition: MethodDL.h:176
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:161
TString fInputLayoutString
The string defining the layout of the input.
Definition: MethodDL.h:173
EMsgType GetMinType() const
Definition: MsgLogger.h:71
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TXMLEngine & xmlengine()
Definition: Tools.h:270
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:337
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:355
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1235
Singleton class for Global types used by TMVA.
Definition: Types.h:73
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
void Print(Option_t *name="") const
Print the matrix as a table of elements.
TMatrixT.
Definition: TMatrixT.h:39
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:128
An array of TObjects.
Definition: TObjArray.h:37
Collectable string class.
Definition: TObjString.h:28
const TString & GetString() const
Definition: TObjString.h:46
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:550
Basic string class.
Definition: TString.h:131
Ssiz_t Length() const
Definition: TString.h:405
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1921
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1106
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:499
const char * Data() const
Definition: TString.h:364
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
@ kTrailing
Definition: TString.h:262
@ kBoth
Definition: TString.h:262
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1138
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:709
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
#define I(x, y, z)
double T(double x)
Definition: ChebyshevPol.h:34
EInitialization
Definition: Functions.h:70
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:80
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:44
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:216
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:55
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDL.cxx:69
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Bool_t IsNaN(Double_t x)
Definition: TMath.h:880
Double_t Log(Double_t x)
Definition: TMath.h:748
All of the options that can be specified in the training string.
Definition: MethodDL.h:62
DNN::EOptimizer optimizer
Definition: MethodDL.h:68
DNN::ERegularization regularization
Definition: MethodDL.h:67
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:73
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
auto * t1
Definition: textangle.C:20
static void output(int code)
Definition: gifencode.c:226