Logo ROOT  
Reference Guide
MethodDNN.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDNN *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A neural network implementation *
12  * *
13  * Authors (alphabetical): *
14  * Simon Pfreundschuh <s.pfreundschuh@gmail.com> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@gmx.ch> - CERN, Switzerland *
16  * *
17  * Copyright (c) 2005-2015: *
18  * CERN, Switzerland *
19  * U. of Victoria, Canada *
20  * MPI-K Heidelberg, Germany *
21  * U. of Bonn, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 /*! \class TMVA::MethodDNN
29 \ingroup TMVA
30 Deep Neural Network Implementation.
31 */
32 
33 #include "TMVA/MethodDNN.h"
34 
35 #include "TString.h"
36 #include "TFormula.h"
37 #include "TObjString.h"
38 
39 #include "TMVA/ClassifierFactory.h"
40 #include "TMVA/Configurable.h"
41 #include "TMVA/IMethod.h"
42 #include "TMVA/MsgLogger.h"
43 #include "TMVA/MethodBase.h"
44 #include "TMVA/Timer.h"
45 #include "TMVA/Types.h"
46 #include "TMVA/Tools.h"
47 #include "TMVA/Config.h"
48 #include "TMVA/Ranking.h"
49 
50 #include "TMVA/DNN/Net.h"
52 
53 #include "TMVA/NeuralNet.h"
54 #include "TMVA/Monitoring.h"
55 
56 #ifdef R__HAS_TMVACPU
58 #endif
59 #ifdef R__HAS_TMVAGPU
61 #endif
62 
63 #include <algorithm>
64 #include <iostream>
65 #include <string>
66 #include <iomanip>
67 
68 REGISTER_METHOD(DNN)
69 
71 
72 namespace TMVA
73 {
74  using namespace DNN;
75 
76  ////////////////////////////////////////////////////////////////////////////////
77  /// standard constructor
78 
79  TMVA::MethodDNN::MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData,
80  const TString &theOption)
81  : MethodBase(jobName, Types::kDNN, methodTitle, theData, theOption), fWeightInitialization(), fOutputFunction(),
82  fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
83  fArchitectureString(), fTrainingSettings(), fResume(false), fSettings()
84  {
85 }
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// constructor from a weight file
89 
90 TMVA::MethodDNN::MethodDNN(DataSetInfo& theData,
91  const TString& theWeightFile)
92  : MethodBase( Types::kDNN, theData, theWeightFile),
93  fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
94  fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
95  fTrainingSettings(), fResume(false), fSettings()
96 {
99 }
100 
101 ////////////////////////////////////////////////////////////////////////////////
102 /// destructor
103 
105 {
106  fWeightInitialization = DNN::EInitialization::kGauss;
107  fOutputFunction = DNN::EOutputFunction::kSigmoid;
108 }
109 
110 ////////////////////////////////////////////////////////////////////////////////
111 /// MLP can handle classification with 2 classes and regression with
112 /// one regression-target
113 
115  UInt_t numberClasses,
116  UInt_t /*numberTargets*/ )
117 {
118  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
119  if (type == Types::kMulticlass ) return kTRUE;
120  if (type == Types::kRegression ) return kTRUE;
121 
122  return kFALSE;
123 }
124 
125 ////////////////////////////////////////////////////////////////////////////////
126 /// default initializations
127 
128 void TMVA::MethodDNN::Init() {
129  Log() << kWARNING
130  << "MethodDNN is deprecated and it will be removed in future ROOT version. "
131  "Please use MethodDL ( TMVA::kDL)"
132  << Endl;
133 
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 /// Options to be set in the option string:
138 ///
139 /// - LearningRate <float> DNN learning rate parameter.
140 /// - DecayRate <float> Decay rate for learning parameter.
141 /// - TestRate <int> Period of validation set error computation.
142 /// - BatchSize <int> Number of event per batch.
143 ///
144 /// - ValidationSize <string> How many events to use for validation. "0.2"
145 /// or "20%" indicates that a fifth of the
146 /// training data should be used. "100"
147 /// indicates that 100 events should be used.
148 
150 {
151 
152  DeclareOptionRef(fLayoutString="SOFTSIGN|(N+100)*2,LINEAR",
153  "Layout",
154  "Layout of the network.");
155 
156  DeclareOptionRef(fValidationSize = "20%", "ValidationSize",
157  "Part of the training data to use for "
158  "validation. Specify as 0.2 or 20% to use a "
159  "fifth of the data set as validation set. "
160  "Specify as 100 to use exactly 100 events. "
161  "(Default: 20%)");
162 
163  DeclareOptionRef(fErrorStrategy="CROSSENTROPY",
164  "ErrorStrategy",
165  "Loss function: Mean squared error (regression)"
166  " or cross entropy (binary classification).");
167  AddPreDefVal(TString("CROSSENTROPY"));
168  AddPreDefVal(TString("SUMOFSQUARES"));
169  AddPreDefVal(TString("MUTUALEXCLUSIVE"));
170 
171  DeclareOptionRef(fWeightInitializationString="XAVIER",
172  "WeightInitialization",
173  "Weight initialization strategy");
174  AddPreDefVal(TString("XAVIER"));
175  AddPreDefVal(TString("XAVIERUNIFORM"));
176 
177  DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
178  AddPreDefVal(TString("STANDARD"));
179  AddPreDefVal(TString("CPU"));
180  AddPreDefVal(TString("GPU"));
181  AddPreDefVal(TString("OPENCL"));
182 
183  DeclareOptionRef(
184  fTrainingStrategyString = "LearningRate=1e-1,"
185  "Momentum=0.3,"
186  "Repetitions=3,"
187  "ConvergenceSteps=50,"
188  "BatchSize=30,"
189  "TestRepetitions=7,"
190  "WeightDecay=0.0,"
191  "Renormalize=L2,"
192  "DropConfig=0.0,"
193  "DropRepetitions=5|LearningRate=1e-4,"
194  "Momentum=0.3,"
195  "Repetitions=3,"
196  "ConvergenceSteps=50,"
197  "BatchSize=20,"
198  "TestRepetitions=7,"
199  "WeightDecay=0.001,"
200  "Renormalize=L2,"
201  "DropConfig=0.0+0.5+0.5,"
202  "DropRepetitions=5,"
203  "Multithreading=True",
204  "TrainingStrategy",
205  "Defines the training strategies.");
206 }
207 
208 ////////////////////////////////////////////////////////////////////////////////
209 /// parse layout specification string and return a vector, each entry
210 /// containing the number of neurons to go in each successive layer
211 
213  -> LayoutVector_t
214 {
215  LayoutVector_t layout;
216  const TString layerDelimiter(",");
217  const TString subDelimiter("|");
218 
219  const size_t inputSize = GetNvar();
220 
221  TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
222  TIter nextLayer (layerStrings);
223  TObjString* layerString = (TObjString*)nextLayer ();
224 
225  for (; layerString != nullptr; layerString = (TObjString*) nextLayer()) {
226  int numNodes = 0;
227  EActivationFunction activationFunction = EActivationFunction::kTanh;
228 
229  TObjArray* subStrings = layerString->GetString().Tokenize(subDelimiter);
230  TIter nextToken (subStrings);
231  TObjString* token = (TObjString *) nextToken();
232  int idxToken = 0;
233  for (; token != nullptr; token = (TObjString *) nextToken()) {
234  switch (idxToken)
235  {
236  case 0:
237  {
238  TString strActFnc (token->GetString ());
239  if (strActFnc == "RELU") {
240  activationFunction = DNN::EActivationFunction::kRelu;
241  } else if (strActFnc == "TANH") {
242  activationFunction = DNN::EActivationFunction::kTanh;
243  } else if (strActFnc == "SYMMRELU") {
244  activationFunction = DNN::EActivationFunction::kSymmRelu;
245  } else if (strActFnc == "SOFTSIGN") {
246  activationFunction = DNN::EActivationFunction::kSoftSign;
247  } else if (strActFnc == "SIGMOID") {
248  activationFunction = DNN::EActivationFunction::kSigmoid;
249  } else if (strActFnc == "LINEAR") {
250  activationFunction = DNN::EActivationFunction::kIdentity;
251  } else if (strActFnc == "GAUSS") {
252  activationFunction = DNN::EActivationFunction::kGauss;
253  }
254  }
255  break;
256  case 1: // number of nodes
257  {
258  TString strNumNodes (token->GetString ());
259  TString strN ("x");
260  strNumNodes.ReplaceAll ("N", strN);
261  strNumNodes.ReplaceAll ("n", strN);
262  TFormula fml ("tmp",strNumNodes);
263  numNodes = fml.Eval (inputSize);
264  }
265  break;
266  }
267  ++idxToken;
268  }
269  layout.push_back(std::make_pair(numNodes, activationFunction));
270  }
271  return layout;
272 }
273 
274 ////////////////////////////////////////////////////////////////////////////////
275 /// parse key value pairs in blocks -> return vector of blocks with map of key value pairs
276 
278  TString blockDelim,
279  TString tokenDelim)
280  -> KeyValueVector_t
281 {
282  KeyValueVector_t blockKeyValues;
283  const TString keyValueDelim ("=");
284 
285  TObjArray* blockStrings = parseString.Tokenize (blockDelim);
286  TIter nextBlock (blockStrings);
287  TObjString* blockString = (TObjString *) nextBlock();
288 
289  for (; blockString != nullptr; blockString = (TObjString *) nextBlock())
290  {
291  blockKeyValues.push_back (std::map<TString,TString>());
292  std::map<TString,TString>& currentBlock = blockKeyValues.back ();
293 
294  TObjArray* subStrings = blockString->GetString ().Tokenize (tokenDelim);
295  TIter nextToken (subStrings);
296  TObjString* token = (TObjString*)nextToken ();
297 
298  for (; token != nullptr; token = (TObjString *)nextToken())
299  {
300  TString strKeyValue (token->GetString ());
301  int delimPos = strKeyValue.First (keyValueDelim.Data ());
302  if (delimPos <= 0)
303  continue;
304 
305  TString strKey = TString (strKeyValue (0, delimPos));
306  strKey.ToUpper();
307  TString strValue = TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
308 
309  strKey.Strip (TString::kBoth, ' ');
310  strValue.Strip (TString::kBoth, ' ');
311 
312  currentBlock.insert (std::make_pair (strKey, strValue));
313  }
314  }
315  return blockKeyValues;
316 }
317 
318 ////////////////////////////////////////////////////////////////////////////////
319 
320 TString fetchValue (const std::map<TString, TString>& keyValueMap, TString key)
321 {
322  key.ToUpper ();
323  std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
324  if (it == keyValueMap.end()) {
325  return TString ("");
326  }
327  return it->second;
328 }
329 
330 ////////////////////////////////////////////////////////////////////////////////
331 
332 template <typename T>
333 T fetchValue(const std::map<TString,TString>& keyValueMap,
334  TString key,
335  T defaultValue);
336 
337 ////////////////////////////////////////////////////////////////////////////////
338 
339 template <>
340 int fetchValue(const std::map<TString,TString>& keyValueMap,
341  TString key,
342  int defaultValue)
343 {
344  TString value (fetchValue (keyValueMap, key));
345  if (value == "") {
346  return defaultValue;
347  }
348  return value.Atoi ();
349 }
350 
351 ////////////////////////////////////////////////////////////////////////////////
352 
353 template <>
354 double fetchValue (const std::map<TString,TString>& keyValueMap,
355  TString key, double defaultValue)
356 {
357  TString value (fetchValue (keyValueMap, key));
358  if (value == "") {
359  return defaultValue;
360  }
361  return value.Atof ();
362 }
363 
364 ////////////////////////////////////////////////////////////////////////////////
365 
366 template <>
367 TString fetchValue (const std::map<TString,TString>& keyValueMap,
368  TString key, TString defaultValue)
369 {
370  TString value (fetchValue (keyValueMap, key));
371  if (value == "") {
372  return defaultValue;
373  }
374  return value;
375 }
376 
377 ////////////////////////////////////////////////////////////////////////////////
378 
379 template <>
380 bool fetchValue (const std::map<TString,TString>& keyValueMap,
381  TString key, bool defaultValue)
382 {
383  TString value (fetchValue (keyValueMap, key));
384  if (value == "") {
385  return defaultValue;
386  }
387  value.ToUpper ();
388  if (value == "TRUE" || value == "T" || value == "1") {
389  return true;
390  }
391  return false;
392 }
393 
394 ////////////////////////////////////////////////////////////////////////////////
395 
396 template <>
397 std::vector<double> fetchValue(const std::map<TString, TString> & keyValueMap,
398  TString key,
399  std::vector<double> defaultValue)
400 {
401  TString parseString (fetchValue (keyValueMap, key));
402  if (parseString == "") {
403  return defaultValue;
404  }
405  parseString.ToUpper ();
406  std::vector<double> values;
407 
408  const TString tokenDelim ("+");
409  TObjArray* tokenStrings = parseString.Tokenize (tokenDelim);
410  TIter nextToken (tokenStrings);
411  TObjString* tokenString = (TObjString*)nextToken ();
412  for (; tokenString != NULL; tokenString = (TObjString*)nextToken ()) {
413  std::stringstream sstr;
414  double currentValue;
415  sstr << tokenString->GetString ().Data ();
416  sstr >> currentValue;
417  values.push_back (currentValue);
418  }
419  return values;
420 }
421 
422 ////////////////////////////////////////////////////////////////////////////////
423 
425 {
426  if (IgnoreEventsWithNegWeightsInTraining()) {
427  Log() << kINFO
428  << "Will ignore negative events in training!"
429  << Endl;
430  }
431 
432  if (fArchitectureString == "STANDARD") {
433  Log() << kERROR << "The STANDARD architecture has been deprecated. "
434  "Please use Architecture=CPU or Architecture=CPU."
435  "See the TMVA Users' Guide for instructions if you "
436  "encounter problems."
437  << Endl;
438  Log() << kFATAL << "The STANDARD architecture has been deprecated. "
439  "Please use Architecture=CPU or Architecture=CPU."
440  "See the TMVA Users' Guide for instructions if you "
441  "encounter problems."
442  << Endl;
443  }
444 
445  if (fArchitectureString == "OPENCL") {
446  Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
447  "Please use Architecture=CPU or Architecture=CPU for the "
448  "time being. See the TMVA Users' Guide for instructions "
449  "if you encounter problems."
450  << Endl;
451  Log() << kFATAL << "The OPENCL architecture has not been implemented yet. "
452  "Please use Architecture=CPU or Architecture=CPU for the "
453  "time being. See the TMVA Users' Guide for instructions "
454  "if you encounter problems."
455  << Endl;
456  }
457 
458  if (fArchitectureString == "GPU") {
459 #ifndef DNNCUDA // Included only if DNNCUDA flag is _not_ set.
460  Log() << kERROR << "CUDA backend not enabled. Please make sure "
461  "you have CUDA installed and it was successfully "
462  "detected by CMAKE."
463  << Endl;
464  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
465  "you have CUDA installed and it was successfully "
466  "detected by CMAKE."
467  << Endl;
468 #endif // DNNCUDA
469  }
470 
471  if (fArchitectureString == "CPU") {
472 #ifndef DNNCPU // Included only if DNNCPU flag is _not_ set.
473  Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
474  "you have a BLAS implementation and it was successfully "
475  "detected by CMake as well that the imt CMake flag is set."
476  << Endl;
477  Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
478  "you have a BLAS implementation and it was successfully "
479  "detected by CMake as well that the imt CMake flag is set."
480  << Endl;
481 #endif // DNNCPU
482  }
483 
484  //
485  // Set network structure.
486  //
487 
488  fLayout = TMVA::MethodDNN::ParseLayoutString (fLayoutString);
489  size_t inputSize = GetNVariables ();
490  size_t outputSize = 1;
491  if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
492  outputSize = GetNTargets();
493  } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
494  outputSize = DataInfo().GetNClasses();
495  }
496 
497  fNet.SetBatchSize(1);
498  fNet.SetInputWidth(inputSize);
499 
500  auto itLayout = std::begin (fLayout);
501  auto itLayoutEnd = std::end (fLayout)-1;
502  for ( ; itLayout != itLayoutEnd; ++itLayout) {
503  fNet.AddLayer((*itLayout).first, (*itLayout).second);
504  }
505  fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
506 
507  //
508  // Loss function and output.
509  //
510 
511  fOutputFunction = EOutputFunction::kSigmoid;
512  if (fAnalysisType == Types::kClassification)
513  {
514  if (fErrorStrategy == "SUMOFSQUARES") {
515  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
516  }
517  if (fErrorStrategy == "CROSSENTROPY") {
518  fNet.SetLossFunction(ELossFunction::kCrossEntropy);
519  }
520  fOutputFunction = EOutputFunction::kSigmoid;
521  } else if (fAnalysisType == Types::kRegression) {
522  if (fErrorStrategy != "SUMOFSQUARES") {
523  Log () << kWARNING << "For regression only SUMOFSQUARES is a valid "
524  << " neural net error function. Setting error function to "
525  << " SUMOFSQUARES now." << Endl;
526  }
527  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
528  fOutputFunction = EOutputFunction::kIdentity;
529  } else if (fAnalysisType == Types::kMulticlass) {
530  if (fErrorStrategy == "SUMOFSQUARES") {
531  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
532  }
533  if (fErrorStrategy == "CROSSENTROPY") {
534  fNet.SetLossFunction(ELossFunction::kCrossEntropy);
535  }
536  if (fErrorStrategy == "MUTUALEXCLUSIVE") {
537  fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
538  }
539  fOutputFunction = EOutputFunction::kSoftmax;
540  }
541 
542  //
543  // Initialization
544  //
545 
546  if (fWeightInitializationString == "XAVIER") {
547  fWeightInitialization = DNN::EInitialization::kGauss;
548  }
549  else if (fWeightInitializationString == "XAVIERUNIFORM") {
550  fWeightInitialization = DNN::EInitialization::kUniform;
551  }
552  else {
553  fWeightInitialization = DNN::EInitialization::kGauss;
554  }
555 
556  //
557  // Training settings.
558  //
559 
560  // Force validation of the ValidationSize option
561  GetNumValidationSamples();
562 
563  KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString,
564  TString ("|"),
565  TString (","));
566 
567  std::cout << "Parsed Training DNN string " << fTrainingStrategyString << std::endl;
568  std::cout << "STring has size " << strategyKeyValues.size() << std::endl;
569  for (auto& block : strategyKeyValues) {
570  TTrainingSettings settings;
571 
572  settings.convergenceSteps = fetchValue(block, "ConvergenceSteps", 100);
573  settings.batchSize = fetchValue(block, "BatchSize", 30);
574  settings.testInterval = fetchValue(block, "TestRepetitions", 7);
575  settings.weightDecay = fetchValue(block, "WeightDecay", 0.0);
576  settings.learningRate = fetchValue(block, "LearningRate", 1e-5);
577  settings.momentum = fetchValue(block, "Momentum", 0.3);
578  settings.dropoutProbabilities = fetchValue(block, "DropConfig",
579  std::vector<Double_t>());
580 
581  TString regularization = fetchValue(block, "Regularization",
582  TString ("NONE"));
583  if (regularization == "L1") {
585  } else if (regularization == "L2") {
587  } else {
589  }
590 
591  TString strMultithreading = fetchValue(block, "Multithreading",
592  TString ("True"));
593  if (strMultithreading.BeginsWith ("T")) {
594  settings.multithreading = true;
595  } else {
596  settings.multithreading = false;
597  }
598 
599  fTrainingSettings.push_back(settings);
600  }
601 }
602 
603 ////////////////////////////////////////////////////////////////////////////////
604 /// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
605 /// 100 etc.
606 /// - 20% and 0.2 selects 20% of the training set as validation data.
607 /// - 100 selects 100 events as the validation data.
608 ///
609 /// @return number of samples in validation set
610 ///
611 
613 {
614  Int_t nValidationSamples = 0;
615  UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
616 
617  // Parsing + Validation
618  // --------------------
619  if (fValidationSize.EndsWith("%")) {
620  // Relative spec. format 20%
621  TString intValStr = TString(fValidationSize.Strip(TString::kTrailing, '%'));
622 
623  if (intValStr.IsFloat()) {
624  Double_t valSizeAsDouble = fValidationSize.Atof() / 100.0;
625  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
626  } else {
627  Log() << kFATAL << "Cannot parse number \"" << fValidationSize
628  << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
629  }
630  } else if (fValidationSize.IsFloat()) {
631  Double_t valSizeAsDouble = fValidationSize.Atof();
632 
633  if (valSizeAsDouble < 1.0) {
634  // Relative spec. format 0.2
635  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
636  } else {
637  // Absolute spec format 100 or 100.0
638  nValidationSamples = valSizeAsDouble;
639  }
640  } else {
641  Log() << kFATAL << "Cannot parse number \"" << fValidationSize << "\". Expected string like \"0.2\" or \"100\"."
642  << Endl;
643  }
644 
645  // Value validation
646  // ----------------
647  if (nValidationSamples < 0) {
648  Log() << kFATAL << "Validation size \"" << fValidationSize << "\" is negative." << Endl;
649  }
650 
651  if (nValidationSamples == 0) {
652  Log() << kFATAL << "Validation size \"" << fValidationSize << "\" is zero." << Endl;
653  }
654 
655  if (nValidationSamples >= (Int_t)trainingSetSize) {
656  Log() << kFATAL << "Validation size \"" << fValidationSize
657  << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
658  }
659 
660  return nValidationSamples;
661 }
662 
663 ////////////////////////////////////////////////////////////////////////////////
664 
666 {
667  if (fInteractive && fInteractive->NotInitialized()){
668  std::vector<TString> titles = {"Error on training set", "Error on test set"};
669  fInteractive->Init(titles);
670  // JsMVA progress bar maximum (100%)
671  fIPyMaxIter = 100;
672  }
673 
674  for (TTrainingSettings & settings : fTrainingSettings) {
675  size_t nValidationSamples = GetNumValidationSamples();
676  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
677  size_t nTestSamples = nValidationSamples;
678 
679  if (nTrainingSamples < settings.batchSize ||
680  nValidationSamples < settings.batchSize ||
681  nTestSamples < settings.batchSize) {
682  Log() << kFATAL << "Number of samples in the datasets are train: "
683  << nTrainingSamples << " valid: " << nValidationSamples
684  << " test: " << nTestSamples << ". "
685  << "One of these is smaller than the batch size of "
686  << settings.batchSize << ". Please increase the batch"
687  << " size to be at least the same size as the smallest"
688  << " of these values." << Endl;
689  }
690  }
691 
692  if (fArchitectureString == "GPU") {
693  TrainGpu();
694  if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
695  ExitFromTraining();
696  return;
697  } else if (fArchitectureString == "OpenCL") {
698  Log() << kFATAL << "OpenCL backend not yet supported." << Endl;
699  return;
700  } else if (fArchitectureString == "CPU") {
701  TrainCpu();
702  if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
703  ExitFromTraining();
704  return;
705  }
706 
707  Log() << kINFO << "Using Standard Implementation.";
708 
709  std::vector<Pattern> trainPattern;
710  std::vector<Pattern> testPattern;
711 
712  size_t nValidationSamples = GetNumValidationSamples();
713  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
714 
715  const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
716  const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
717  const std::vector<TMVA::Event *> eventCollectionTesting{allData.begin() + nTrainingSamples, allData.end()};
718 
719  for (auto &event : eventCollectionTraining) {
720  const std::vector<Float_t>& values = event->GetValues();
721  if (fAnalysisType == Types::kClassification) {
722  double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
723  trainPattern.push_back(Pattern (values.begin(),
724  values.end(),
725  outputValue,
726  event->GetWeight()));
727  trainPattern.back().addInput(1.0);
728  } else if (fAnalysisType == Types::kMulticlass) {
729  std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
730  oneHot[event->GetClass()] = 1.0;
731  trainPattern.push_back(Pattern (values.begin(), values.end(),
732  oneHot.cbegin(), oneHot.cend(),
733  event->GetWeight()));
734  trainPattern.back().addInput(1.0);
735  } else {
736  const std::vector<Float_t>& targets = event->GetTargets ();
737  trainPattern.push_back(Pattern(values.begin(),
738  values.end(),
739  targets.begin(),
740  targets.end(),
741  event->GetWeight ()));
742  trainPattern.back ().addInput (1.0); // bias node
743  }
744  }
745 
746  for (auto &event : eventCollectionTesting) {
747  const std::vector<Float_t>& values = event->GetValues();
748  if (fAnalysisType == Types::kClassification) {
749  double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
750  testPattern.push_back(Pattern (values.begin(),
751  values.end(),
752  outputValue,
753  event->GetWeight()));
754  testPattern.back().addInput(1.0);
755  } else if (fAnalysisType == Types::kMulticlass) {
756  std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
757  oneHot[event->GetClass()] = 1.0;
758  testPattern.push_back(Pattern (values.begin(), values.end(),
759  oneHot.cbegin(), oneHot.cend(),
760  event->GetWeight()));
761  testPattern.back().addInput(1.0);
762  } else {
763  const std::vector<Float_t>& targets = event->GetTargets ();
764  testPattern.push_back(Pattern(values.begin(),
765  values.end(),
766  targets.begin(),
767  targets.end(),
768  event->GetWeight ()));
769  testPattern.back ().addInput (1.0); // bias node
770  }
771  }
772 
774  std::vector<double> weights;
775 
776  net.SetIpythonInteractive(fInteractive, &fExitFromTraining, &fIPyMaxIter, &fIPyCurrentIter);
777 
778  net.setInputSize(fNet.GetInputWidth() + 1);
779  net.setOutputSize(fNet.GetOutputWidth() + 1);
780 
781  for (size_t i = 0; i < fNet.GetDepth(); i++) {
782  EActivationFunction f = fNet.GetLayer(i).GetActivationFunction();
783  EnumFunction g = EnumFunction::LINEAR;
784  switch(f) {
785  case EActivationFunction::kIdentity: g = EnumFunction::LINEAR; break;
786  case EActivationFunction::kRelu: g = EnumFunction::RELU; break;
787  case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID; break;
788  case EActivationFunction::kTanh: g = EnumFunction::TANH; break;
789  case EActivationFunction::kFastTanh: g = EnumFunction::TANH; break;
790  case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU; break;
791  case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN; break;
792  case EActivationFunction::kGauss: g = EnumFunction::GAUSS; break;
793  }
794  if (i < fNet.GetDepth() - 1) {
795  net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g));
796  } else {
797  ModeOutputValues h = ModeOutputValues::DIRECT;
798  switch(fOutputFunction) {
799  case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT; break;
800  case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID; break;
801  case EOutputFunction::kSoftmax: h = ModeOutputValues::SOFTMAX; break;
802  }
803  net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g, h));
804  }
805  }
806 
807  switch(fNet.GetLossFunction()) {
808  case ELossFunction::kMeanSquaredError:
809  net.setErrorFunction(ModeErrorFunction::SUMOFSQUARES);
810  break;
812  net.setErrorFunction(ModeErrorFunction::CROSSENTROPY);
813  break;
814  case ELossFunction::kSoftmaxCrossEntropy:
815  net.setErrorFunction(ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE);
816  break;
817  }
818 
819  switch(fWeightInitialization) {
821  net.initializeWeights(WeightInitializationStrategy::XAVIER,
822  std::back_inserter(weights));
823  break;
825  net.initializeWeights(WeightInitializationStrategy::XAVIERUNIFORM,
826  std::back_inserter(weights));
827  break;
828  default:
829  net.initializeWeights(WeightInitializationStrategy::XAVIER,
830  std::back_inserter(weights));
831  break;
832  }
833 
834  int idxSetting = 0;
835  for (auto s : fTrainingSettings) {
836 
838  switch(s.regularization) {
840  case ERegularization::kL1: r = EnumRegularization::L1; break;
841  case ERegularization::kL2: r = EnumRegularization::L2; break;
842  }
843 
844  Settings * settings = new Settings(TString(), s.convergenceSteps, s.batchSize,
845  s.testInterval, s.weightDecay, r,
846  MinimizerType::fSteepest, s.learningRate,
847  s.momentum, 1, s.multithreading);
848  std::shared_ptr<Settings> ptrSettings(settings);
849  ptrSettings->setMonitoring (0);
850  Log() << kINFO
851  << "Training with learning rate = " << ptrSettings->learningRate ()
852  << ", momentum = " << ptrSettings->momentum ()
853  << ", repetitions = " << ptrSettings->repetitions ()
854  << Endl;
855 
856  ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
857  (idxSetting+1)*100.0/(fSettings.size ()));
858 
859  const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
860  if (!dropConfig.empty ()) {
861  Log () << kINFO << "Drop configuration" << Endl
862  << " drop repetitions = " << ptrSettings->dropRepetitions()
863  << Endl;
864  }
865 
866  int idx = 0;
867  for (auto f : dropConfig) {
868  Log () << kINFO << " Layer " << idx << " = " << f << Endl;
869  ++idx;
870  }
871  Log () << kINFO << Endl;
872 
873  DNN::Steepest minimizer(ptrSettings->learningRate(),
874  ptrSettings->momentum(),
875  ptrSettings->repetitions());
876  net.train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
877  ptrSettings.reset();
878  Log () << kINFO << Endl;
879  idxSetting++;
880  }
881  size_t weightIndex = 0;
882  for (size_t l = 0; l < fNet.GetDepth(); l++) {
883  auto & layerWeights = fNet.GetLayer(l).GetWeights();
884  for (Int_t j = 0; j < layerWeights.GetNcols(); j++) {
885  for (Int_t i = 0; i < layerWeights.GetNrows(); i++) {
886  layerWeights(i,j) = weights[weightIndex];
887  weightIndex++;
888  }
889  }
890  auto & layerBiases = fNet.GetLayer(l).GetBiases();
891  if (l == 0) {
892  for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
893  layerBiases(i,0) = weights[weightIndex];
894  weightIndex++;
895  }
896  } else {
897  for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
898  layerBiases(i,0) = 0.0;
899  }
900  }
901  }
902  if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
903  ExitFromTraining();
904 }
905 
906 ////////////////////////////////////////////////////////////////////////////////
907 
909 {
910 
911 #ifdef DNNCUDA // Included only if DNNCUDA flag is set.
912  Log() << kINFO << "Start of neural network training on GPU." << Endl << Endl;
913 
914  size_t nValidationSamples = GetNumValidationSamples();
915  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
916  size_t nTestSamples = nValidationSamples;
917 
918  Log() << kDEBUG << "Using " << nValidationSamples << " validation samples." << Endl;
919  Log() << kDEBUG << "Using " << nTestSamples << " training samples." << Endl;
920 
921  size_t trainingPhase = 1;
922  fNet.Initialize(fWeightInitialization);
923  for (TTrainingSettings & settings : fTrainingSettings) {
924 
925  if (fInteractive){
926  fInteractive->ClearGraphs();
927  }
928 
929  TNet<TCuda<>> net(settings.batchSize, fNet);
930  net.SetWeightDecay(settings.weightDecay);
931  net.SetRegularization(settings.regularization);
932 
933  // Need to convert dropoutprobabilities to conventions used
934  // by backend implementation.
935  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
936  for (auto & p : dropoutVector) {
937  p = 1.0 - p;
938  }
939  net.SetDropoutProbabilities(dropoutVector);
940 
941  net.InitializeGradients();
942  auto testNet = net.CreateClone(settings.batchSize);
943 
944  Log() << kINFO << "Training phase " << trainingPhase << " of "
945  << fTrainingSettings.size() << ":" << Endl;
946  trainingPhase++;
947 
948  using DataLoader_t = TDataLoader<TMVAInput_t, TCuda<>>;
949 
950  // Split training data into training and validation set
951  const std::vector<Event *> &allData = GetEventCollection(Types::kTraining);
952  const std::vector<Event *> trainingInputData =
953  std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
954  const std::vector<Event *> testInputData =
955  std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
956 
957  if (trainingInputData.size() != nTrainingSamples) {
958  Log() << kFATAL << "Inconsistent training sample size" << Endl;
959  }
960  if (testInputData.size() != nTestSamples) {
961  Log() << kFATAL << "Inconsistent test sample size" << Endl;
962  }
963 
964  size_t nThreads = 1;
965  TMVAInput_t trainingTuple = std::tie(trainingInputData, DataInfo());
966  TMVAInput_t testTuple = std::tie(testInputData, DataInfo());
967  DataLoader_t trainingData(trainingTuple, nTrainingSamples,
968  net.GetBatchSize(), net.GetInputWidth(),
969  net.GetOutputWidth(), nThreads);
970  DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
971  net.GetInputWidth(), net.GetOutputWidth(),
972  nThreads);
973  DNN::TGradientDescent<TCuda<>> minimizer(settings.learningRate,
974  settings.convergenceSteps,
975  settings.testInterval);
976 
977  std::vector<TNet<TCuda<>>> nets{};
978  std::vector<TBatch<TCuda<>>> batches{};
979  nets.reserve(nThreads);
980  for (size_t i = 0; i < nThreads; i++) {
981  nets.push_back(net);
982  for (size_t j = 0; j < net.GetDepth(); j++)
983  {
984  auto &masterLayer = net.GetLayer(j);
985  auto &layer = nets.back().GetLayer(j);
986  TCuda<>::Copy(layer.GetWeights(),
987  masterLayer.GetWeights());
988  TCuda<>::Copy(layer.GetBiases(),
989  masterLayer.GetBiases());
990  }
991  }
992 
993  bool converged = false;
994  size_t stepCount = 0;
995  size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
996 
997  std::chrono::time_point<std::chrono::system_clock> start, end;
998  start = std::chrono::system_clock::now();
999 
1000  if (!fInteractive) {
1001  Log() << std::setw(10) << "Epoch" << " | "
1002  << std::setw(12) << "Train Err."
1003  << std::setw(12) << "Test Err."
1004  << std::setw(12) << "GFLOP/s"
1005  << std::setw(12) << "Conv. Steps" << Endl;
1006  std::string separator(62, '-');
1007  Log() << separator << Endl;
1008  }
1009 
1010  while (!converged)
1011  {
1012  stepCount++;
1013 
1014  // Perform minimization steps for a full epoch.
1015  trainingData.Shuffle();
1016  for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1017  batches.clear();
1018  for (size_t j = 0; j < nThreads; j++) {
1019  batches.reserve(nThreads);
1020  batches.push_back(trainingData.GetBatch());
1021  }
1022  if (settings.momentum > 0.0) {
1023  minimizer.StepMomentum(net, nets, batches, settings.momentum);
1024  } else {
1025  minimizer.Step(net, nets, batches);
1026  }
1027  }
1028 
1029  if ((stepCount % minimizer.GetTestInterval()) == 0) {
1030 
1031  // Compute test error.
1032  Double_t testError = 0.0;
1033  for (auto batch : testData) {
1034  auto inputMatrix = batch.GetInput();
1035  auto outputMatrix = batch.GetOutput();
1036  testError += testNet.Loss(inputMatrix, outputMatrix);
1037  }
1038  testError /= (Double_t) (nTestSamples / settings.batchSize);
1039 
1040  //Log the loss value
1041  fTrainHistory.AddValue("testError",stepCount,testError);
1042 
1043  end = std::chrono::system_clock::now();
1044 
1045  // Compute training error.
1046  Double_t trainingError = 0.0;
1047  for (auto batch : trainingData) {
1048  auto inputMatrix = batch.GetInput();
1049  auto outputMatrix = batch.GetOutput();
1050  trainingError += net.Loss(inputMatrix, outputMatrix);
1051  }
1052  trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
1053  //Log the loss value
1054  fTrainHistory.AddValue("trainingError",stepCount,trainingError);
1055 
1056  // Compute numerical throughput.
1057  std::chrono::duration<double> elapsed_seconds = end - start;
1058  double seconds = elapsed_seconds.count();
1059  double nFlops = (double) (settings.testInterval * batchesInEpoch);
1060  nFlops *= net.GetNFlops() * 1e-9;
1061 
1062  converged = minimizer.HasConverged(testError);
1063  start = std::chrono::system_clock::now();
1064 
1065  if (fInteractive) {
1066  fInteractive->AddPoint(stepCount, trainingError, testError);
1067  fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1068  / minimizer.GetConvergenceSteps ();
1069  if (fExitFromTraining) break;
1070  } else {
1071  Log() << std::setw(10) << stepCount << " | "
1072  << std::setw(12) << trainingError
1073  << std::setw(12) << testError
1074  << std::setw(12) << nFlops / seconds
1075  << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1076  if (converged) {
1077  Log() << Endl;
1078  }
1079  }
1080  }
1081  }
1082  for (size_t l = 0; l < net.GetDepth(); l++) {
1083  fNet.GetLayer(l).GetWeights() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetWeights();
1084  fNet.GetLayer(l).GetBiases() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetBiases();
1085  }
1086  }
1087 
1088 #else // DNNCUDA flag not set.
1089 
1090  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1091  "you have CUDA installed and it was successfully "
1092  "detected by CMAKE." << Endl;
1093 #endif // DNNCUDA
1094 }
1095 
1096 ////////////////////////////////////////////////////////////////////////////////
1097 
1099 {
1100 
1101 #ifdef DNNCPU // Included only if DNNCPU flag is set.
1102  Log() << kINFO << "Start of neural network training on CPU." << Endl << Endl;
1103 
1104  size_t nValidationSamples = GetNumValidationSamples();
1105  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1106  size_t nTestSamples = nValidationSamples;
1107 
1108  Log() << kDEBUG << "Using " << nValidationSamples << " validation samples." << Endl;
1109  Log() << kDEBUG << "Using " << nTestSamples << " training samples." << Endl;
1110 
1111  fNet.Initialize(fWeightInitialization);
1112 
1113  size_t trainingPhase = 1;
1114  for (TTrainingSettings & settings : fTrainingSettings) {
1115 
1116  if (fInteractive){
1117  fInteractive->ClearGraphs();
1118  }
1119 
1120  Log() << "Training phase " << trainingPhase << " of "
1121  << fTrainingSettings.size() << ":" << Endl;
1122  trainingPhase++;
1123 
1124  TNet<TCpu<>> net(settings.batchSize, fNet);
1125  net.SetWeightDecay(settings.weightDecay);
1126  net.SetRegularization(settings.regularization);
1127  // Need to convert dropoutprobabilities to conventions used
1128  // by backend implementation.
1129  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1130  for (auto & p : dropoutVector) {
1131  p = 1.0 - p;
1132  }
1133  net.SetDropoutProbabilities(dropoutVector);
1134  net.InitializeGradients();
1135  auto testNet = net.CreateClone(settings.batchSize);
1136 
1137  using DataLoader_t = TDataLoader<TMVAInput_t, TCpu<>>;
1138 
1139  // Split training data into training and validation set
1140  const std::vector<Event *> &allData = GetEventCollection(Types::kTraining);
1141  const std::vector<Event *> trainingInputData =
1142  std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
1143  const std::vector<Event *> testInputData =
1144  std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
1145 
1146  if (trainingInputData.size() != nTrainingSamples) {
1147  Log() << kFATAL << "Inconsistent training sample size" << Endl;
1148  }
1149  if (testInputData.size() != nTestSamples) {
1150  Log() << kFATAL << "Inconsistent test sample size" << Endl;
1151  }
1152 
1153  size_t nThreads = 1;
1154  TMVAInput_t trainingTuple = std::tie(trainingInputData, DataInfo());
1155  TMVAInput_t testTuple = std::tie(testInputData, DataInfo());
1156  DataLoader_t trainingData(trainingTuple, nTrainingSamples,
1157  net.GetBatchSize(), net.GetInputWidth(),
1158  net.GetOutputWidth(), nThreads);
1159  DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
1160  net.GetInputWidth(), net.GetOutputWidth(),
1161  nThreads);
1162  DNN::TGradientDescent<TCpu<>> minimizer(settings.learningRate,
1163  settings.convergenceSteps,
1164  settings.testInterval);
1165 
1166  std::vector<TNet<TCpu<>>> nets{};
1167  std::vector<TBatch<TCpu<>>> batches{};
1168  nets.reserve(nThreads);
1169  for (size_t i = 0; i < nThreads; i++) {
1170  nets.push_back(net);
1171  for (size_t j = 0; j < net.GetDepth(); j++)
1172  {
1173  auto &masterLayer = net.GetLayer(j);
1174  auto &layer = nets.back().GetLayer(j);
1175  TCpu<>::Copy(layer.GetWeights(),
1176  masterLayer.GetWeights());
1177  TCpu<>::Copy(layer.GetBiases(),
1178  masterLayer.GetBiases());
1179  }
1180  }
1181 
1182  bool converged = false;
1183  size_t stepCount = 0;
1184  size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
1185 
1186  std::chrono::time_point<std::chrono::system_clock> start, end;
1187  start = std::chrono::system_clock::now();
1188 
1189  if (!fInteractive) {
1190  Log() << std::setw(10) << "Epoch" << " | "
1191  << std::setw(12) << "Train Err."
1192  << std::setw(12) << "Test Err."
1193  << std::setw(12) << "GFLOP/s"
1194  << std::setw(12) << "Conv. Steps" << Endl;
1195  std::string separator(62, '-');
1196  Log() << separator << Endl;
1197  }
1198 
1199  while (!converged)
1200  {
1201  stepCount++;
1202  // Perform minimization steps for a full epoch.
1203  trainingData.Shuffle();
1204  for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1205  batches.clear();
1206  for (size_t j = 0; j < nThreads; j++) {
1207  batches.reserve(nThreads);
1208  batches.push_back(trainingData.GetBatch());
1209  }
1210  if (settings.momentum > 0.0) {
1211  minimizer.StepMomentum(net, nets, batches, settings.momentum);
1212  } else {
1213  minimizer.Step(net, nets, batches);
1214  }
1215  }
1216 
1217  if ((stepCount % minimizer.GetTestInterval()) == 0) {
1218 
1219  // Compute test error.
1220  Double_t testError = 0.0;
1221  for (auto batch : testData) {
1222  auto inputMatrix = batch.GetInput();
1223  auto outputMatrix = batch.GetOutput();
1224  auto weightMatrix = batch.GetWeights();
1225  testError += testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
1226  }
1227  testError /= (Double_t) (nTestSamples / settings.batchSize);
1228 
1229  //Log the loss value
1230  fTrainHistory.AddValue("testError",stepCount,testError);
1231 
1232  end = std::chrono::system_clock::now();
1233 
1234  // Compute training error.
1235  Double_t trainingError = 0.0;
1236  for (auto batch : trainingData) {
1237  auto inputMatrix = batch.GetInput();
1238  auto outputMatrix = batch.GetOutput();
1239  auto weightMatrix = batch.GetWeights();
1240  trainingError += net.Loss(inputMatrix, outputMatrix, weightMatrix);
1241  }
1242  trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
1243 
1244  //Log the loss value
1245  fTrainHistory.AddValue("trainingError",stepCount,trainingError);
1246 
1247  if (fInteractive){
1248  fInteractive->AddPoint(stepCount, trainingError, testError);
1249  fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
1250  if (fExitFromTraining) break;
1251  }
1252 
1253  // Compute numerical throughput.
1254  std::chrono::duration<double> elapsed_seconds = end - start;
1255  double seconds = elapsed_seconds.count();
1256  double nFlops = (double) (settings.testInterval * batchesInEpoch);
1257  nFlops *= net.GetNFlops() * 1e-9;
1258 
1259  converged = minimizer.HasConverged(testError);
1260  start = std::chrono::system_clock::now();
1261 
1262  if (fInteractive) {
1263  fInteractive->AddPoint(stepCount, trainingError, testError);
1264  fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1265  / minimizer.GetConvergenceSteps ();
1266  if (fExitFromTraining) break;
1267  } else {
1268  Log() << std::setw(10) << stepCount << " | "
1269  << std::setw(12) << trainingError
1270  << std::setw(12) << testError
1271  << std::setw(12) << nFlops / seconds
1272  << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1273  if (converged) {
1274  Log() << Endl;
1275  }
1276  }
1277  }
1278  }
1279 
1280 
1281  for (size_t l = 0; l < net.GetDepth(); l++) {
1282  auto & layer = fNet.GetLayer(l);
1283  layer.GetWeights() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetWeights();
1284  layer.GetBiases() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetBiases();
1285  }
1286  }
1287 
1288 #else // DNNCPU flag not set.
1289  Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1290  "you have a BLAS implementation and it was successfully "
1291  "detected by CMake as well that the imt CMake flag is set." << Endl;
1292 #endif // DNNCPU
1293 }
1294 
1295 ////////////////////////////////////////////////////////////////////////////////
1296 
1298 {
1299  size_t nVariables = GetEvent()->GetNVariables();
1300  Matrix_t X(1, nVariables);
1301  Matrix_t YHat(1, 1);
1302 
1303  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1304  for (size_t i = 0; i < nVariables; i++) {
1305  X(0,i) = inputValues[i];
1306  }
1307 
1308  fNet.Prediction(YHat, X, fOutputFunction);
1309  return YHat(0,0);
1310 }
1311 
1312 ////////////////////////////////////////////////////////////////////////////////
1313 
1314 const std::vector<Float_t> & TMVA::MethodDNN::GetRegressionValues()
1315 {
1316  size_t nVariables = GetEvent()->GetNVariables();
1317  Matrix_t X(1, nVariables);
1318 
1319  const Event *ev = GetEvent();
1320  const std::vector<Float_t>& inputValues = ev->GetValues();
1321  for (size_t i = 0; i < nVariables; i++) {
1322  X(0,i) = inputValues[i];
1323  }
1324 
1325  size_t nTargets = std::max(1u, ev->GetNTargets());
1326  Matrix_t YHat(1, nTargets);
1327  std::vector<Float_t> output(nTargets);
1328  auto net = fNet.CreateClone(1);
1329  net.Prediction(YHat, X, fOutputFunction);
1330 
1331  for (size_t i = 0; i < nTargets; i++)
1332  output[i] = YHat(0, i);
1333 
1334  if (fRegressionReturnVal == NULL) {
1335  fRegressionReturnVal = new std::vector<Float_t>();
1336  }
1337  fRegressionReturnVal->clear();
1338 
1339  Event * evT = new Event(*ev);
1340  for (size_t i = 0; i < nTargets; ++i) {
1341  evT->SetTarget(i, output[i]);
1342  }
1343 
1344  const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1345  for (size_t i = 0; i < nTargets; ++i) {
1346  fRegressionReturnVal->push_back(evT2->GetTarget(i));
1347  }
1348  delete evT;
1349  return *fRegressionReturnVal;
1350 }
1351 
1352 const std::vector<Float_t> & TMVA::MethodDNN::GetMulticlassValues()
1353 {
1354  size_t nVariables = GetEvent()->GetNVariables();
1355  Matrix_t X(1, nVariables);
1356  Matrix_t YHat(1, DataInfo().GetNClasses());
1357  if (fMulticlassReturnVal == NULL) {
1358  fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1359  }
1360 
1361  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1362  for (size_t i = 0; i < nVariables; i++) {
1363  X(0,i) = inputValues[i];
1364  }
1365 
1366  fNet.Prediction(YHat, X, fOutputFunction);
1367  for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1368  (*fMulticlassReturnVal)[i] = YHat(0, i);
1369  }
1370  return *fMulticlassReturnVal;
1371 }
1372 
1373 ////////////////////////////////////////////////////////////////////////////////
1374 
1375 void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const
1376 {
1377  void* nn = gTools().xmlengine().NewChild(parent, 0, "Weights");
1378  Int_t inputWidth = fNet.GetInputWidth();
1379  Int_t depth = fNet.GetDepth();
1380  char lossFunction = static_cast<char>(fNet.GetLossFunction());
1381  gTools().xmlengine().NewAttr(nn, 0, "InputWidth",
1382  gTools().StringFromInt(inputWidth));
1383  gTools().xmlengine().NewAttr(nn, 0, "Depth", gTools().StringFromInt(depth));
1384  gTools().xmlengine().NewAttr(nn, 0, "LossFunction", TString(lossFunction));
1385  gTools().xmlengine().NewAttr(nn, 0, "OutputFunction",
1386  TString(static_cast<char>(fOutputFunction)));
1387 
1388  for (Int_t i = 0; i < depth; i++) {
1389  const auto& layer = fNet.GetLayer(i);
1390  auto layerxml = gTools().xmlengine().NewChild(nn, 0, "Layer");
1391  int activationFunction = static_cast<int>(layer.GetActivationFunction());
1392  gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
1393  TString::Itoa(activationFunction, 10));
1394  WriteMatrixXML(layerxml, "Weights", layer.GetWeights());
1395  WriteMatrixXML(layerxml, "Biases", layer.GetBiases());
1396  }
1397 }
1398 
1399 ////////////////////////////////////////////////////////////////////////////////
1400 
1402 {
1403  auto netXML = gTools().GetChild(rootXML, "Weights");
1404  if (!netXML){
1405  netXML = rootXML;
1406  }
1407 
1408  fNet.Clear();
1409  fNet.SetBatchSize(1);
1410 
1411  size_t inputWidth, depth;
1412  gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1413  gTools().ReadAttr(netXML, "Depth", depth);
1414  char lossFunctionChar;
1415  gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1416  char outputFunctionChar;
1417  gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1418 
1419  fNet.SetInputWidth(inputWidth);
1420  fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1421  fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1422 
1423  size_t previousWidth = inputWidth;
1424  auto layerXML = gTools().xmlengine().GetChild(netXML, "Layer");
1425  for (size_t i = 0; i < depth; i++) {
1426  TString fString;
1428 
1429  // Read activation function.
1430  gTools().ReadAttr(layerXML, "ActivationFunction", fString);
1431  f = static_cast<EActivationFunction>(fString.Atoi());
1432 
1433  // Read number of neurons.
1434  size_t width;
1435  auto matrixXML = gTools().GetChild(layerXML, "Weights");
1436  gTools().ReadAttr(matrixXML, "rows", width);
1437 
1438  fNet.AddLayer(width, f);
1439  TMatrixT<Double_t> weights(width, previousWidth);
1440  TMatrixT<Double_t> biases(width, 1);
1441  ReadMatrixXML(layerXML, "Weights", weights);
1442  ReadMatrixXML(layerXML, "Biases", biases);
1443  fNet.GetLayer(i).GetWeights() = weights;
1444  fNet.GetLayer(i).GetBiases() = biases;
1445 
1446  layerXML = gTools().GetNextChild(layerXML);
1447  previousWidth = width;
1448  }
1449 }
1450 
1451 ////////////////////////////////////////////////////////////////////////////////
1452 
1453 void TMVA::MethodDNN::ReadWeightsFromStream( std::istream & /*istr*/)
1454 {
1455 }
1456 
1457 ////////////////////////////////////////////////////////////////////////////////
1458 
1460 {
1461  fRanking = new Ranking( GetName(), "Importance" );
1462  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1463  fRanking->AddRank( Rank( GetInputLabel(ivar), 1.0));
1464  }
1465  return fRanking;
1466 }
1467 
1468 ////////////////////////////////////////////////////////////////////////////////
1469 
1470 void TMVA::MethodDNN::MakeClassSpecific( std::ostream& /*fout*/,
1471  const TString& /*className*/ ) const
1472 {
1473 }
1474 
1475 ////////////////////////////////////////////////////////////////////////////////
1476 
1478 {
1479  // get help message text
1480  //
1481  // typical length of text line:
1482  // "|--------------------------------------------------------------|"
1483  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1484  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1485 
1486  Log() << Endl;
1487  Log() << col << "--- Short description:" << colres << Endl;
1488  Log() << Endl;
1489  Log() << "The DNN neural network is a feedforward" << Endl;
1490  Log() << "multilayer perceptron implementation. The DNN has a user-" << Endl;
1491  Log() << "defined hidden layer architecture, where the number of input (output)" << Endl;
1492  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1493  Log() << "signal and one background, regression or multiclass). " << Endl;
1494  Log() << Endl;
1495  Log() << col << "--- Performance optimisation:" << colres << Endl;
1496  Log() << Endl;
1497 
1498  const char* txt = "The DNN supports various options to improve performance in terms of training speed and \n \
1499 reduction of overfitting: \n \
1500 \n \
1501  - different training settings can be stacked. Such that the initial training \n\
1502  is done with a large learning rate and a large drop out fraction whilst \n \
1503  in a later stage learning rate and drop out can be reduced. \n \
1504  - drop out \n \
1505  [recommended: \n \
1506  initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \
1507  later training stage: 0.1 or 0.0 for all layers \n \
1508  final training stage: 0.0] \n \
1509  Drop out is a technique where a at each training cycle a fraction of arbitrary \n \
1510  nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \
1511  - L1 and L2 regularization are available \n \
1512  - Minibatches \n \
1513  [recommended 10 - 150] \n \
1514  Arbitrary mini-batch sizes can be chosen. \n \
1515  - Multithreading \n \
1516  [recommended: True] \n \
1517  Multithreading can be turned on. The minibatches are distributed to the available \n \
1518  cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \
1519  \n \
1520  Options: \n \
1521  \"Layout\": \n \
1522  - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \
1523  - meaning: \n \
1524  . two hidden layers (separated by \",\") \n \
1525  . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \
1526  . the activation function for the output layer is LINEAR \n \
1527  . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \
1528  . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \
1529  . the number of nodes in the output layer is determined by the number of output nodes \n \
1530  and can therefore not be chosen freely. \n \
1531  \n \
1532  \"ErrorStrategy\": \n \
1533  - SUMOFSQUARES \n \
1534  The error of the neural net is determined by a sum-of-squares error function \n \
1535  For regression, this is the only possible choice. \n \
1536  - CROSSENTROPY \n \
1537  The error of the neural net is determined by a cross entropy function. The \n \
1538  output values are automatically (internally) transformed into probabilities \n \
1539  using a sigmoid function. \n \
1540  For signal/background classification this is the default choice. \n \
1541  For multiclass using cross entropy more than one or no output classes \n \
1542  can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \
1543  A and C is true, Event 2: C is true, ...) \n \
1544  - MUTUALEXCLUSIVE \n \
1545  In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \
1546  \n \
1547  \"WeightInitialization\" \n \
1548  - XAVIER \n \
1549  [recommended] \n \
1550  \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \
1551  such that the variance of the values of the nodes is preserved for each layer. \n \
1552  - XAVIERUNIFORM \n \
1553  The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \
1554  - LAYERSIZE \n \
1555  Random values scaled by the layer size \n \
1556  \n \
1557  \"TrainingStrategy\" \n \
1558  - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \
1559  - explanation: two stacked training settings separated by \"|\" \n \
1560  . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \
1561  . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \
1562  . LearningRate : \n \
1563  - recommended for classification: 0.1 initially, 1e-4 later \n \
1564  - recommended for regression: 1e-4 and less \n \
1565  . Momentum : \n \
1566  preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \
1567  . Repetitions : \n \
1568  train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \
1569  . ConvergenceSteps : \n \
1570  Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \
1571  of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \
1572  cycle the test samples are evaluated and thus the convergence is checked) \n \
1573  . BatchSize \n \
1574  Size of the mini-batches. \n \
1575  . TestRepetitions \n \
1576  Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \
1577  . WeightDecay \n \
1578  If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \
1579  . Renormalize \n \
1580  NONE, L1 (|w|) or L2 (w^2) \n \
1581  . DropConfig \n \
1582  Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \
1583  in the DropConfig. \n \
1584  [example: DropConfig=0.0+0.5+0.3 \n \
1585  meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \
1586  in layer 2 \n \
1587  recommended: leave all the nodes turned on for the input layer (layer 0) \n \
1588  turn off half of the nodes in later layers for the initial training; leave all nodes \n \
1589  turned on (0.0) in later training stages] \n \
1590  . DropRepetitions \n \
1591  Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \
1592  [recommended : 1] \n \
1593  . Multithreading \n \
1594  turn on multithreading [recommended: True] \n \
1595  \n";
1596  Log () << txt << Endl;
1597 }
1598 
1599 } // namespace TMVA
Cpu.h
l
auto * l
Definition: textangle.C:4
TMVA::DNN::EInitialization::kUniform
@ kUniform
ROOT::TMetaUtils::propNames::separator
static const std::string separator("@@@")
TXMLEngine::NewAttr
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:586
TMVA::MethodDNN::TTrainingSettings::testInterval
size_t testInterval
Definition: MethodDNN.h:92
TMVA::MethodDNN::GetRegressionValues
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDNN.cxx:1314
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:100
TMVA::Types::kMulticlass
@ kMulticlass
Definition: Types.h:131
TMVA::MethodDL::fOutputFunction
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:185
e
#define e(i)
Definition: RSha256.hxx:103
TMVA::Tools::GetChild
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TMVA::MethodDNN::ReadWeightsFromStream
virtual void ReadWeightsFromStream(std::istream &)=0
TMVA::DNN::EOutputFunction::kSigmoid
@ kSigmoid
TObjArray
An array of TObjects.
Definition: TObjArray.h:37
f
#define f(i)
Definition: RSha256.hxx:104
TMVA::DNN::Steepest
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:334
TMVA::DNN::TGradientDescent::StepMomentum
void StepMomentum(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t >> &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum.
Definition: Minimizers.h:439
TMVA::kERROR
@ kERROR
Definition: Types.h:62
TMVA::DNN::EOutputFunction
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
TXMLEngine::NewChild
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:715
TString::Atoi
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1943
TMVA::DNN::TGradientDescent::HasConverged
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
Definition: Minimizers.h:668
TFormula
The Formula class.
Definition: TFormula.h:87
TString::Strip
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1128
TMVA::Types::kRegression
@ kRegression
Definition: Types.h:130
TString::Data
const char * Data() const
Definition: TString.h:369
TMVA::MethodDNN::HasAnalysisType
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
TMVA::DNN::EnumFunction
EnumFunction
Definition: NeuralNet.h:157
TMVA::Ranking
Ranking for variables in method (implementation)
Definition: Ranking.h:48
TObjString.h
TMVA::DNN::regularization
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
TMVA::DNN::ERegularization::kL1
@ kL1
r
ROOT::R::TRInterface & r
Definition: Object.C:4
IMethod.h
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
output
static void output(int code)
Definition: gifencode.c:226
TString::Atof
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:2009
Ranking.h
TMVA::MethodDNN::GetHelpMessage
void GetHelpMessage() const
Definition: MethodDNN.cxx:1477
TMVA::MethodDNN::TrainGpu
void TrainGpu()
Definition: MethodDNN.cxx:908
width
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
TMVA::DNN::TGradientDescent::GetConvergenceCount
size_t GetConvergenceCount() const
Definition: Minimizers.h:160
TGeant4Unit::s
static constexpr double s
Definition: TGeant4SystemOfUnits.h:162
TMVA::MethodDNN::ProcessOptions
void ProcessOptions()
Definition: MethodDNN.cxx:424
TMVA::MethodDNN::TTrainingSettings::regularization
DNN::ERegularization regularization
Definition: MethodDNN.h:94
TMVA::DNN::TMVAInput_t
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
Int_t
int Int_t
Definition: RtypesCore.h:45
TObjString::GetString
const TString & GetString() const
Definition: TObjString.h:46
TMVA::DNN::ELossFunction::kCrossEntropy
@ kCrossEntropy
MethodDNN.h
TMVA::MethodDNN::TTrainingSettings::convergenceSteps
size_t convergenceSteps
Definition: MethodDNN.h:93
TMVA::MethodDNN::TTrainingSettings::dropoutProbabilities
std::vector< Double_t > dropoutProbabilities
Definition: MethodDNN.h:98
event
Definition: triangle.c:552
TMVA::DNN::TGradientDescent::GetTestInterval
size_t GetTestInterval() const
Definition: Minimizers.h:164
TMVA::DNN::ERegularization::kNone
@ kNone
TXMLEngine::GetChild
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
Definition: TXMLEngine.cxx:1146
TMVA::DNN::EActivationFunction::kSoftSign
@ kSoftSign
TMVA::MethodDNN::Matrix_t
typename Architecture_t::Matrix_t Matrix_t
Definition: MethodDNN.h:82
TMVA::MethodDNN
Deep Neural Network Implementation.
Definition: MethodDNN.h:77
MethodBase.h
TMVA::Event::GetTarget
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
TMVA::Event::SetTarget
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:359
TMVA::DNN::EActivationFunction::kGauss
@ kGauss
TMVA::Rank
Definition: Ranking.h:76
TMVA::MethodDNN::KeyValueVector_t
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDNN.h:87
TMVA::MethodDL::fWeightInitialization
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:184
TString::IsFloat
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1813
TMVA::DNN::EActivationFunction::kTanh
@ kTanh
TString
Basic string class.
Definition: TString.h:136
TMatrixT
TMatrixT.
Definition: TMatrixT.h:39
Bool_t
bool Bool_t
Definition: RtypesCore.h:63
TMVA::MethodDNN::ReadWeightsFromXML
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDNN.cxx:1401
TString.h
REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition: ClassifierFactory.h:124
TString::Itoa
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2047
TMVA::Tools::xmlengine
TXMLEngine & xmlengine()
Definition: Tools.h:268
TString::kBoth
@ kBoth
Definition: TString.h:267
TMVA::DNN::TGradientDescent
Definition: Minimizers.h:56
Cuda.h
TString::ToUpper
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1160
TObjString
Collectable string class.
Definition: TObjString.h:28
TMVA::MethodDNN::Init
void Init()
TMVA::MethodDNN::TTrainingSettings::multithreading
bool multithreading
Definition: MethodDNN.h:99
Monitoring.h
NeuralNet.h
TMVA::MethodDNN::ParseKeyValueString
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
TString::Tokenize
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2219
MsgLogger.h
Timer.h
TMVA::Event::GetValues
std::vector< Float_t > & GetValues()
Definition: Event.h:94
TString::kTrailing
@ kTrailing
Definition: TString.h:267
TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:128
h
#define h(i)
Definition: RSha256.hxx:106
TMVA::Config::WriteOptionsReference
Bool_t WriteOptionsReference() const
Definition: Config.h:67
TMVA::DNN::EActivationFunction::kSymmRelu
@ kSymmRelu
TMVA::DNN::TNet
Generic neural network class.
Definition: Net.h:49
TMVA::DNN::TCuda::Copy
static void Copy(Matrix_t &B, const Matrix_t &A)
TMVA::DNN::ERegularization::kL2
@ kL2
TMVA::gConfig
Config & gConfig()
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:101
NONE
#define NONE
Definition: Rotated.cxx:52
TMVA::DNN::EActivationFunction::kRelu
@ kRelu
TMVA::DNN::EnumRegularization
EnumRegularization
Definition: NeuralNet.h:173
TMVA::Tools::ReadAttr
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
TMVA::Types::kClassification
@ kClassification
Definition: Types.h:129
TMVA::MethodDNN::GetNumValidationSamples
UInt_t GetNumValidationSamples()
TMVA::DNN::Settings
Settings for the training of the neural net.
Definition: NeuralNet.h:730
Net.h
PyTorch_Generate_CNN_Model.net
net
Definition: PyTorch_Generate_CNN_Model.py:12
UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:46
TString::BeginsWith
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:615
TMVA::DNN::Layer
Layer defines the layout of a layer.
Definition: NeuralNet.h:673
double
double
Definition: Converters.cxx:939
TMVA::DNN::EOutputFunction::kIdentity
@ kIdentity
TMVA::MethodDNN::TTrainingSettings::momentum
Double_t momentum
Definition: MethodDNN.h:96
TMVA::DNN::EOutputFunction::kSoftmax
@ kSoftmax
Types.h
Configurable.h
TMVA::fetchValue
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDNN.cxx:320
TMVA::DNN::TCpu::Copy
static void Copy(Matrix_t &B, const Matrix_t &A)
Definition: Arithmetic.hxx:269
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Config.h
unsigned int
TMVA::MethodDNN::CreateRanking
const Ranking * CreateRanking()
Definition: MethodDNN.cxx:1459
TMVA::DNN::ModeOutputValues
ModeOutputValues
Definition: NeuralNet.h:179
TMVA::Tools::Color
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
TMVA::Types::kTraining
@ kTraining
Definition: Types.h:145
TMVA::MethodDNN::TrainCpu
void TrainCpu()
Definition: MethodDNN.cxx:1098
TMVA::DNN::ELossFunction
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
TMVA::DNN::TDataLoader
TDataLoader.
Definition: DataLoader.h:129
Pattern
Definition: Pattern.h:8
TMVA::MethodDNN::~MethodDNN
virtual ~MethodDNN()
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::MethodDNN::TTrainingSettings::learningRate
Double_t learningRate
Definition: MethodDNN.h:95
TMVA::kFATAL
@ kFATAL
Definition: Types.h:63
TMVA::Tools::GetNextChild
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
TMVA::MethodDNN::TTrainingSettings::weightDecay
Double_t weightDecay
Definition: MethodDNN.h:97
Reference.h
TMVA::DNN::EInitialization::kGauss
@ kGauss
TMVA::MethodDNN::DeclareOptions
void DeclareOptions()
TMVA::Event
Definition: Event.h:51
TMVA::MethodDNN::TTrainingSettings
Definition: MethodDNN.h:90
TMVA::kDEBUG
@ kDEBUG
Definition: Types.h:58
ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:34
TMVA::MethodDNN::GetMulticlassValues
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDNN.cxx:1352
TMVA::DNN::EActivationFunction::kSigmoid
@ kSigmoid
TMVA::DNN::EActivationFunction::kIdentity
@ kIdentity
TIter
Definition: TCollection.h:233
TMVA::MethodDNN::Train
void Train()
Definition: MethodDNN.cxx:665
TMVA::DNN::TGradientDescent::Step
void Step(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch.
Definition: Minimizers.h:332
TMVA::MethodDNN::AddWeightsXMLTo
void AddWeightsXMLTo(void *parent) const
Definition: MethodDNN.cxx:1375
TMVA::MethodDNN::GetMvaValue
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDNN.cxx:1297
TMVA::kINFO
@ kINFO
Definition: Types.h:60
TMVA::MethodDNN::ParseLayoutString
LayoutVector_t ParseLayoutString(TString layerSpec)
Tools.h
ClassifierFactory.h
type
int type
Definition: TGX11.cxx:121
TMVA::gTools
Tools & gTools()
TMVA::MethodDNN::MethodDNN
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
TMVA::kWARNING
@ kWARNING
Definition: Types.h:61
TMVA::Event::GetNTargets
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
TMVA::MethodDNN::TTrainingSettings::batchSize
size_t batchSize
Definition: MethodDNN.h:91
TMVA::MethodDNN::MakeClassSpecific
void MakeClassSpecific(std::ostream &, const TString &) const
Definition: MethodDNN.cxx:1470
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
TMVA::DNN::fSteepest
@ fSteepest
SGD.
Definition: NeuralNet.h:322
int
TMVA::DNN::Net
neural net
Definition: NeuralNet.h:1062
TMVA::DNN::TGradientDescent::GetConvergenceSteps
size_t GetConvergenceSteps() const
Definition: Minimizers.h:161
g
#define g(i)
Definition: RSha256.hxx:105