Logo ROOT   6.10/09
Reference Guide
MethodDNN.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDNN *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A neural network implementation *
12  * *
13  * Authors (alphabetical): *
14  * Simon Pfreundschuh <s.pfreundschuh@gmail.com> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@gmx.ch> - CERN, Switzerland *
16  * *
17  * Copyright (c) 2005-2015: *
18  * CERN, Switzerland *
19  * U. of Victoria, Canada *
20  * MPI-K Heidelberg, Germany *
21  * U. of Bonn, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 /*! \class TMVA::MethodDNN
29 \ingroup TMVA
30 Deep Neural Network Implementation.
31 */
32 
33 #include "TMVA/MethodDNN.h"
34 
35 #include "TString.h"
36 #include "TTree.h"
37 #include "TFile.h"
38 #include "TFormula.h"
39 
40 #include "TMVA/ClassifierFactory.h"
41 #include "TMVA/Configurable.h"
42 #include "TMVA/IMethod.h"
43 #include "TMVA/MsgLogger.h"
44 #include "TMVA/MethodBase.h"
45 #include "TMVA/Timer.h"
46 #include "TMVA/Types.h"
47 #include "TMVA/Tools.h"
48 #include "TMVA/Config.h"
49 #include "TMVA/Ranking.h"
50 
51 #include "TMVA/DNN/Net.h"
53 
54 #include "TMVA/NeuralNet.h"
55 #include "TMVA/Monitoring.h"
56 
57 #include <algorithm>
58 #include <iostream>
59 #include <string>
60 #include <iomanip>
61 
62 REGISTER_METHOD(DNN)
63 
65 
66 namespace TMVA
67 {
68  using namespace DNN;
69 
70  ////////////////////////////////////////////////////////////////////////////////
71  /// standard constructor
72 
73  TMVA::MethodDNN::MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData,
74  const TString &theOption)
75  : MethodBase(jobName, Types::kDNN, methodTitle, theData, theOption), fWeightInitialization(), fOutputFunction(),
76  fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
77  fArchitectureString(), fTrainingSettings(), fResume(false), fSettings()
78  {
79 }
80 
81 ////////////////////////////////////////////////////////////////////////////////
82 /// constructor from a weight file
83 
84 TMVA::MethodDNN::MethodDNN(DataSetInfo& theData,
85  const TString& theWeightFile)
86  : MethodBase( Types::kDNN, theData, theWeightFile),
87  fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
88  fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
89  fTrainingSettings(), fResume(false), fSettings()
90 {
91  fWeightInitialization = DNN::EInitialization::kGauss;
92  fOutputFunction = DNN::EOutputFunction::kSigmoid;
93 }
94 
95 ////////////////////////////////////////////////////////////////////////////////
96 /// destructor
97 
99 {
100  fWeightInitialization = DNN::EInitialization::kGauss;
101  fOutputFunction = DNN::EOutputFunction::kSigmoid;
102 }
103 
104 ////////////////////////////////////////////////////////////////////////////////
105 /// MLP can handle classification with 2 classes and regression with
106 /// one regression-target
107 
109  UInt_t numberClasses,
110  UInt_t /*numberTargets*/ )
111 {
112  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
113  if (type == Types::kMulticlass ) return kTRUE;
114  if (type == Types::kRegression ) return kTRUE;
115 
116  return kFALSE;
117 }
118 
119 ////////////////////////////////////////////////////////////////////////////////
120 /// default initializations
121 
123 {
124  // TODO: Remove once weights are considered by the method.
125  auto &dsi = this->DataInfo();
126  auto numClasses = dsi.GetNClasses();
127  for (UInt_t i = 0; i < numClasses; ++i) {
128  if (dsi.GetWeightExpression(i) != TString("")) {
129  Log() << kERROR << "Currently event weights are not considered properly by this method." << Endl;
130  break;
131  }
132  }
133 }
134 
135 ////////////////////////////////////////////////////////////////////////////////
136 /// Options to be set in the option string:
137 ///
138 /// - LearningRate <float> DNN learning rate parameter.
139 /// - DecayRate <float> Decay rate for learning parameter.
140 /// - TestRate <int> Period of validation set error computation.
141 /// - BatchSize <int> Number of event per batch.
142 
144 {
145 
146  DeclareOptionRef(fLayoutString="SOFTSIGN|(N+100)*2,LINEAR",
147  "Layout",
148  "Layout of the network.");
149 
150  DeclareOptionRef(fErrorStrategy="CROSSENTROPY",
151  "ErrorStrategy",
152  "Loss function: Mean squared error (regression)"
153  " or cross entropy (binary classification).");
154  AddPreDefVal(TString("CROSSENTROPY"));
155  AddPreDefVal(TString("SUMOFSQUARES"));
156  AddPreDefVal(TString("MUTUALEXCLUSIVE"));
157 
158  DeclareOptionRef(fWeightInitializationString="XAVIER",
159  "WeightInitialization",
160  "Weight initialization strategy");
161  AddPreDefVal(TString("XAVIER"));
162  AddPreDefVal(TString("XAVIERUNIFORM"));
163 
164  DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
165  AddPreDefVal(TString("STANDARD"));
166  AddPreDefVal(TString("CPU"));
167  AddPreDefVal(TString("GPU"));
168  AddPreDefVal(TString("OPENCL"));
169 
171  fTrainingStrategyString = "LearningRate=1e-1,"
172  "Momentum=0.3,"
173  "Repetitions=3,"
174  "ConvergenceSteps=50,"
175  "BatchSize=30,"
176  "TestRepetitions=7,"
177  "WeightDecay=0.0,"
178  "Renormalize=L2,"
179  "DropConfig=0.0,"
180  "DropRepetitions=5|LearningRate=1e-4,"
181  "Momentum=0.3,"
182  "Repetitions=3,"
183  "ConvergenceSteps=50,"
184  "BatchSize=20,"
185  "TestRepetitions=7,"
186  "WeightDecay=0.001,"
187  "Renormalize=L2,"
188  "DropConfig=0.0+0.5+0.5,"
189  "DropRepetitions=5,"
190  "Multithreading=True",
191  "TrainingStrategy",
192  "Defines the training strategies.");
193 }
194 
195 ////////////////////////////////////////////////////////////////////////////////
196 /// parse layout specification string and return a vector, each entry
197 /// containing the number of neurons to go in each successive layer
198 
200  -> LayoutVector_t
201 {
202  LayoutVector_t layout;
203  const TString layerDelimiter(",");
204  const TString subDelimiter("|");
205 
206  const size_t inputSize = GetNvar();
207 
208  TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
209  TIter nextLayer (layerStrings);
210  TObjString* layerString = (TObjString*)nextLayer ();
211 
212  for (; layerString != nullptr; layerString = (TObjString*) nextLayer()) {
213  int numNodes = 0;
214  EActivationFunction activationFunction = EActivationFunction::kTanh;
215 
216  TObjArray* subStrings = layerString->GetString().Tokenize(subDelimiter);
217  TIter nextToken (subStrings);
218  TObjString* token = (TObjString *) nextToken();
219  int idxToken = 0;
220  for (; token != nullptr; token = (TObjString *) nextToken()) {
221  switch (idxToken)
222  {
223  case 0:
224  {
225  TString strActFnc (token->GetString ());
226  if (strActFnc == "RELU") {
227  activationFunction = DNN::EActivationFunction::kRelu;
228  } else if (strActFnc == "TANH") {
229  activationFunction = DNN::EActivationFunction::kTanh;
230  } else if (strActFnc == "SYMMRELU") {
231  activationFunction = DNN::EActivationFunction::kSymmRelu;
232  } else if (strActFnc == "SOFTSIGN") {
233  activationFunction = DNN::EActivationFunction::kSoftSign;
234  } else if (strActFnc == "SIGMOID") {
235  activationFunction = DNN::EActivationFunction::kSigmoid;
236  } else if (strActFnc == "LINEAR") {
237  activationFunction = DNN::EActivationFunction::kIdentity;
238  } else if (strActFnc == "GAUSS") {
239  activationFunction = DNN::EActivationFunction::kGauss;
240  }
241  }
242  break;
243  case 1: // number of nodes
244  {
245  TString strNumNodes (token->GetString ());
246  TString strN ("x");
247  strNumNodes.ReplaceAll ("N", strN);
248  strNumNodes.ReplaceAll ("n", strN);
249  TFormula fml ("tmp",strNumNodes);
250  numNodes = fml.Eval (inputSize);
251  }
252  break;
253  }
254  ++idxToken;
255  }
256  layout.push_back(std::make_pair(numNodes, activationFunction));
257  }
258  return layout;
259 }
260 
261 ////////////////////////////////////////////////////////////////////////////////
262 /// parse key value pairs in blocks -> return vector of blocks with map of key value pairs
263 
265  TString blockDelim,
266  TString tokenDelim)
267  -> KeyValueVector_t
268 {
269  KeyValueVector_t blockKeyValues;
270  const TString keyValueDelim ("=");
271 
272  TObjArray* blockStrings = parseString.Tokenize (blockDelim);
273  TIter nextBlock (blockStrings);
274  TObjString* blockString = (TObjString *) nextBlock();
275 
276  for (; blockString != nullptr; blockString = (TObjString *) nextBlock())
277  {
278  blockKeyValues.push_back (std::map<TString,TString>());
279  std::map<TString,TString>& currentBlock = blockKeyValues.back ();
280 
281  TObjArray* subStrings = blockString->GetString ().Tokenize (tokenDelim);
282  TIter nextToken (subStrings);
283  TObjString* token = (TObjString*)nextToken ();
284 
285  for (; token != nullptr; token = (TObjString *)nextToken())
286  {
287  TString strKeyValue (token->GetString ());
288  int delimPos = strKeyValue.First (keyValueDelim.Data ());
289  if (delimPos <= 0)
290  continue;
291 
292  TString strKey = TString (strKeyValue (0, delimPos));
293  strKey.ToUpper();
294  TString strValue = TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
295 
296  strKey.Strip (TString::kBoth, ' ');
297  strValue.Strip (TString::kBoth, ' ');
298 
299  currentBlock.insert (std::make_pair (strKey, strValue));
300  }
301  }
302  return blockKeyValues;
303 }
304 
305 ////////////////////////////////////////////////////////////////////////////////
306 
307 TString fetchValue (const std::map<TString, TString>& keyValueMap, TString key)
308 {
309  key.ToUpper ();
310  std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
311  if (it == keyValueMap.end()) {
312  return TString ("");
313  }
314  return it->second;
315 }
316 
317 ////////////////////////////////////////////////////////////////////////////////
318 
319 template <typename T>
320 T fetchValue(const std::map<TString,TString>& keyValueMap,
321  TString key,
322  T defaultValue);
323 
324 ////////////////////////////////////////////////////////////////////////////////
325 
326 template <>
327 int fetchValue(const std::map<TString,TString>& keyValueMap,
328  TString key,
329  int defaultValue)
330 {
331  TString value (fetchValue (keyValueMap, key));
332  if (value == "") {
333  return defaultValue;
334  }
335  return value.Atoi ();
336 }
337 
338 ////////////////////////////////////////////////////////////////////////////////
339 
340 template <>
341 double fetchValue (const std::map<TString,TString>& keyValueMap,
342  TString key, double defaultValue)
343 {
344  TString value (fetchValue (keyValueMap, key));
345  if (value == "") {
346  return defaultValue;
347  }
348  return value.Atof ();
349 }
350 
351 ////////////////////////////////////////////////////////////////////////////////
352 
353 template <>
354 TString fetchValue (const std::map<TString,TString>& keyValueMap,
355  TString key, TString defaultValue)
356 {
357  TString value (fetchValue (keyValueMap, key));
358  if (value == "") {
359  return defaultValue;
360  }
361  return value;
362 }
363 
364 ////////////////////////////////////////////////////////////////////////////////
365 
366 template <>
367 bool fetchValue (const std::map<TString,TString>& keyValueMap,
368  TString key, bool defaultValue)
369 {
370  TString value (fetchValue (keyValueMap, key));
371  if (value == "") {
372  return defaultValue;
373  }
374  value.ToUpper ();
375  if (value == "TRUE" || value == "T" || value == "1") {
376  return true;
377  }
378  return false;
379 }
380 
381 ////////////////////////////////////////////////////////////////////////////////
382 
383 template <>
384 std::vector<double> fetchValue(const std::map<TString, TString> & keyValueMap,
385  TString key,
386  std::vector<double> defaultValue)
387 {
388  TString parseString (fetchValue (keyValueMap, key));
389  if (parseString == "") {
390  return defaultValue;
391  }
392  parseString.ToUpper ();
393  std::vector<double> values;
394 
395  const TString tokenDelim ("+");
396  TObjArray* tokenStrings = parseString.Tokenize (tokenDelim);
397  TIter nextToken (tokenStrings);
398  TObjString* tokenString = (TObjString*)nextToken ();
399  for (; tokenString != NULL; tokenString = (TObjString*)nextToken ()) {
400  std::stringstream sstr;
401  double currentValue;
402  sstr << tokenString->GetString ().Data ();
403  sstr >> currentValue;
404  values.push_back (currentValue);
405  }
406  return values;
407 }
408 
409 ////////////////////////////////////////////////////////////////////////////////
410 
412 {
414  Log() << kINFO
415  << "Will ignore negative events in training!"
416  << Endl;
417  }
418 
419  if (fArchitectureString == "STANDARD") {
420  Log() << kERROR << "The STANDARD architecture has been deprecated. "
421  "Please use Architecture=CPU or Architecture=CPU."
422  "See the TMVA Users' Guide for instructions if you "
423  "encounter problems."
424  << Endl;
425  Log() << kFATAL << "The STANDARD architecture has been deprecated. "
426  "Please use Architecture=CPU or Architecture=CPU."
427  "See the TMVA Users' Guide for instructions if you "
428  "encounter problems."
429  << Endl;
430  }
431 
432  if (fArchitectureString == "OPENCL") {
433  Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
434  "Please use Architecture=CPU or Architecture=CPU for the "
435  "time being. See the TMVA Users' Guide for instructions "
436  "if you encounter problems."
437  << Endl;
438  Log() << kFATAL << "The OPENCL architecture has not been implemented yet. "
439  "Please use Architecture=CPU or Architecture=CPU for the "
440  "time being. See the TMVA Users' Guide for instructions "
441  "if you encounter problems."
442  << Endl;
443  }
444 
445  if (fArchitectureString == "GPU") {
446 #ifndef DNNCUDA // Included only if DNNCUDA flag is _not_ set.
447  Log() << kERROR << "CUDA backend not enabled. Please make sure "
448  "you have CUDA installed and it was successfully "
449  "detected by CMAKE."
450  << Endl;
451  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
452  "you have CUDA installed and it was successfully "
453  "detected by CMAKE."
454  << Endl;
455 #endif // DNNCUDA
456  }
457 
458  if (fArchitectureString == "CPU") {
459 #ifndef DNNCPU // Included only if DNNCPU flag is _not_ set.
460  Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
461  "you have a BLAS implementation and it was successfully "
462  "detected by CMake as well that the imt CMake flag is set."
463  << Endl;
464  Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
465  "you have a BLAS implementation and it was successfully "
466  "detected by CMake as well that the imt CMake flag is set."
467  << Endl;
468 #endif // DNNCPU
469  }
470 
471  //
472  // Set network structure.
473  //
474 
475  fLayout = TMVA::MethodDNN::ParseLayoutString (fLayoutString);
476  size_t inputSize = GetNVariables ();
477  size_t outputSize = 1;
478  if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
479  outputSize = GetNTargets();
480  } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
481  outputSize = DataInfo().GetNClasses();
482  }
483 
484  fNet.SetBatchSize(1);
485  fNet.SetInputWidth(inputSize);
486 
487  auto itLayout = std::begin (fLayout);
488  auto itLayoutEnd = std::end (fLayout)-1;
489  for ( ; itLayout != itLayoutEnd; ++itLayout) {
490  fNet.AddLayer((*itLayout).first, (*itLayout).second);
491  }
492  fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
493 
494  //
495  // Loss function and output.
496  //
497 
498  fOutputFunction = EOutputFunction::kSigmoid;
500  {
501  if (fErrorStrategy == "SUMOFSQUARES") {
502  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
503  }
504  if (fErrorStrategy == "CROSSENTROPY") {
505  fNet.SetLossFunction(ELossFunction::kCrossEntropy);
506  }
507  fOutputFunction = EOutputFunction::kSigmoid;
508  } else if (fAnalysisType == Types::kRegression) {
509  if (fErrorStrategy != "SUMOFSQUARES") {
510  Log () << kWARNING << "For regression only SUMOFSQUARES is a valid "
511  << " neural net error function. Setting error function to "
512  << " SUMOFSQUARES now." << Endl;
513  }
514  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
515  fOutputFunction = EOutputFunction::kIdentity;
516  } else if (fAnalysisType == Types::kMulticlass) {
517  if (fErrorStrategy == "SUMOFSQUARES") {
518  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
519  }
520  if (fErrorStrategy == "CROSSENTROPY") {
521  fNet.SetLossFunction(ELossFunction::kCrossEntropy);
522  }
523  if (fErrorStrategy == "MUTUALEXCLUSIVE") {
524  fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
525  }
526  fOutputFunction = EOutputFunction::kSoftmax;
527  }
528 
529  //
530  // Initialization
531  //
532 
533  if (fWeightInitializationString == "XAVIER") {
534  fWeightInitialization = DNN::EInitialization::kGauss;
535  }
536  else if (fWeightInitializationString == "XAVIERUNIFORM") {
537  fWeightInitialization = DNN::EInitialization::kUniform;
538  }
539  else {
540  fWeightInitialization = DNN::EInitialization::kGauss;
541  }
542 
543  //
544  // Training settings.
545  //
546 
547  KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString,
548  TString ("|"),
549  TString (","));
550  for (auto& block : strategyKeyValues) {
551  TTrainingSettings settings;
552 
553  settings.convergenceSteps = fetchValue(block, "ConvergenceSteps", 100);
554  settings.batchSize = fetchValue(block, "BatchSize", 30);
555  settings.testInterval = fetchValue(block, "TestRepetitions", 7);
556  settings.weightDecay = fetchValue(block, "WeightDecay", 0.0);
557  settings.learningRate = fetchValue(block, "LearningRate", 1e-5);
558  settings.momentum = fetchValue(block, "Momentum", 0.3);
559  settings.dropoutProbabilities = fetchValue(block, "DropConfig",
560  std::vector<Double_t>());
561 
562  TString regularization = fetchValue(block, "Regularization",
563  TString ("NONE"));
564  if (regularization == "L1") {
566  } else if (regularization == "L2") {
568  }
569 
570  TString strMultithreading = fetchValue(block, "Multithreading",
571  TString ("True"));
572  if (strMultithreading.BeginsWith ("T")) {
573  settings.multithreading = true;
574  } else {
575  settings.multithreading = false;
576  }
577 
578  fTrainingSettings.push_back(settings);
579  }
580 }
581 
582 ////////////////////////////////////////////////////////////////////////////////
583 
585 {
587  std::vector<TString> titles = {"Error on training set", "Error on test set"};
588  fInteractive->Init(titles);
589  // JsMVA progress bar maximum (100%)
590  fIPyMaxIter = 100;
591  }
592 
593  if (fArchitectureString == "GPU") {
594  TrainGpu();
597  return;
598  } else if (fArchitectureString == "OpenCL") {
599  Log() << kFATAL << "OpenCL backend not yet supported." << Endl;
600  return;
601  } else if (fArchitectureString == "CPU") {
602  TrainCpu();
605  return;
606  }
607 
608  Log() << kINFO << "Using Standard Implementation.";
609 
610  std::vector<Pattern> trainPattern;
611  std::vector<Pattern> testPattern;
612 
613  const std::vector<TMVA::Event*>& eventCollectionTraining = GetEventCollection (Types::kTraining);
614  const std::vector<TMVA::Event*>& eventCollectionTesting = GetEventCollection (Types::kTesting);
615 
616  for (auto &event : eventCollectionTraining) {
617  const std::vector<Float_t>& values = event->GetValues();
619  double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
620  trainPattern.push_back(Pattern (values.begin(),
621  values.end(),
622  outputValue,
623  event->GetWeight()));
624  trainPattern.back().addInput(1.0);
625  } else if (fAnalysisType == Types::kMulticlass) {
626  std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
627  oneHot[event->GetClass()] = 1.0;
628  trainPattern.push_back(Pattern (values.begin(), values.end(),
629  oneHot.cbegin(), oneHot.cend(),
630  event->GetWeight()));
631  trainPattern.back().addInput(1.0);
632  } else {
633  const std::vector<Float_t>& targets = event->GetTargets ();
634  trainPattern.push_back(Pattern(values.begin(),
635  values.end(),
636  targets.begin(),
637  targets.end(),
638  event->GetWeight ()));
639  trainPattern.back ().addInput (1.0); // bias node
640  }
641  }
642 
643  for (auto &event : eventCollectionTesting) {
644  const std::vector<Float_t>& values = event->GetValues();
646  double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
647  testPattern.push_back(Pattern (values.begin(),
648  values.end(),
649  outputValue,
650  event->GetWeight()));
651  testPattern.back().addInput(1.0);
652  } else if (fAnalysisType == Types::kMulticlass) {
653  std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
654  oneHot[event->GetClass()] = 1.0;
655  testPattern.push_back(Pattern (values.begin(), values.end(),
656  oneHot.cbegin(), oneHot.cend(),
657  event->GetWeight()));
658  testPattern.back().addInput(1.0);
659  } else {
660  const std::vector<Float_t>& targets = event->GetTargets ();
661  testPattern.push_back(Pattern(values.begin(),
662  values.end(),
663  targets.begin(),
664  targets.end(),
665  event->GetWeight ()));
666  testPattern.back ().addInput (1.0); // bias node
667  }
668  }
669 
670  TMVA::DNN::Net net;
671  std::vector<double> weights;
672 
674 
675  net.setInputSize(fNet.GetInputWidth() + 1);
676  net.setOutputSize(fNet.GetOutputWidth() + 1);
677 
678  for (size_t i = 0; i < fNet.GetDepth(); i++) {
679  EActivationFunction f = fNet.GetLayer(i).GetActivationFunction();
680  EnumFunction g = EnumFunction::LINEAR;
681  switch(f) {
682  case EActivationFunction::kIdentity: g = EnumFunction::LINEAR; break;
683  case EActivationFunction::kRelu: g = EnumFunction::RELU; break;
684  case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID; break;
685  case EActivationFunction::kTanh: g = EnumFunction::TANH; break;
686  case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU; break;
687  case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN; break;
688  case EActivationFunction::kGauss: g = EnumFunction::GAUSS; break;
689  }
690  if (i < fNet.GetDepth() - 1) {
691  net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g));
692  } else {
693  ModeOutputValues h = ModeOutputValues::DIRECT;
694  switch(fOutputFunction) {
695  case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT; break;
696  case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID; break;
697  case EOutputFunction::kSoftmax: h = ModeOutputValues::SOFTMAX; break;
698  }
699  net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g, h));
700  }
701  }
702 
703  switch(fNet.GetLossFunction()) {
704  case ELossFunction::kMeanSquaredError:
705  net.setErrorFunction(ModeErrorFunction::SUMOFSQUARES);
706  break;
707  case ELossFunction::kCrossEntropy:
708  net.setErrorFunction(ModeErrorFunction::CROSSENTROPY);
709  break;
710  case ELossFunction::kSoftmaxCrossEntropy:
711  net.setErrorFunction(ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE);
712  break;
713  }
714 
715  switch(fWeightInitialization) {
716  case EInitialization::kGauss:
717  net.initializeWeights(WeightInitializationStrategy::XAVIER,
718  std::back_inserter(weights));
719  break;
720  case EInitialization::kUniform:
721  net.initializeWeights(WeightInitializationStrategy::XAVIERUNIFORM,
722  std::back_inserter(weights));
723  break;
724  default:
725  net.initializeWeights(WeightInitializationStrategy::XAVIER,
726  std::back_inserter(weights));
727  break;
728  }
729 
730  int idxSetting = 0;
731  for (auto s : fTrainingSettings) {
732 
734  switch(s.regularization) {
736  case ERegularization::kL1: r = EnumRegularization::L1; break;
737  case ERegularization::kL2: r = EnumRegularization::L2; break;
738  }
739 
740  Settings * settings = new Settings(TString(), s.convergenceSteps, s.batchSize,
741  s.testInterval, s.weightDecay, r,
742  MinimizerType::fSteepest, s.learningRate,
743  s.momentum, 1, s.multithreading);
744  std::shared_ptr<Settings> ptrSettings(settings);
745  ptrSettings->setMonitoring (0);
746  Log() << kINFO
747  << "Training with learning rate = " << ptrSettings->learningRate ()
748  << ", momentum = " << ptrSettings->momentum ()
749  << ", repetitions = " << ptrSettings->repetitions ()
750  << Endl;
751 
752  ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
753  (idxSetting+1)*100.0/(fSettings.size ()));
754 
755  const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
756  if (!dropConfig.empty ()) {
757  Log () << kINFO << "Drop configuration" << Endl
758  << " drop repetitions = " << ptrSettings->dropRepetitions()
759  << Endl;
760  }
761 
762  int idx = 0;
763  for (auto f : dropConfig) {
764  Log () << kINFO << " Layer " << idx << " = " << f << Endl;
765  ++idx;
766  }
767  Log () << kINFO << Endl;
768 
769  DNN::Steepest minimizer(ptrSettings->learningRate(),
770  ptrSettings->momentum(),
771  ptrSettings->repetitions());
772  net.train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
773  ptrSettings.reset();
774  Log () << kINFO << Endl;
775  idxSetting++;
776  }
777  size_t weightIndex = 0;
778  for (size_t l = 0; l < fNet.GetDepth(); l++) {
779  auto & layerWeights = fNet.GetLayer(l).GetWeights();
780  for (Int_t j = 0; j < layerWeights.GetNcols(); j++) {
781  for (Int_t i = 0; i < layerWeights.GetNrows(); i++) {
782  layerWeights(i,j) = weights[weightIndex];
783  weightIndex++;
784  }
785  }
786  auto & layerBiases = fNet.GetLayer(l).GetBiases();
787  if (l == 0) {
788  for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
789  layerBiases(i,0) = weights[weightIndex];
790  weightIndex++;
791  }
792  } else {
793  for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
794  layerBiases(i,0) = 0.0;
795  }
796  }
797  }
800 }
801 
802 ////////////////////////////////////////////////////////////////////////////////
803 
805 {
806 
807 #ifdef DNNCUDA // Included only if DNNCUDA flag is set.
808 
809  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size();
810  size_t nTestSamples = GetEventCollection(Types::kTesting).size();
811 
812  Log() << kINFO << "Start of neural network training on GPU." << Endl;
813 
814  size_t trainingPhase = 1;
815  fNet.Initialize(fWeightInitialization);
816  for (TTrainingSettings & settings : fTrainingSettings) {
817 
818  if (fInteractive){
820  }
821 
822  TNet<TCuda<>> net(settings.batchSize, fNet);
823  net.SetWeightDecay(settings.weightDecay);
824  net.SetRegularization(settings.regularization);
825 
826  // Need to convert dropoutprobabilities to conventions used
827  // by backend implementation.
828  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
829  for (auto & p : dropoutVector) {
830  p = 1.0 - p;
831  }
832  net.SetDropoutProbabilities(dropoutVector);
833 
834  net.InitializeGradients();
835  auto testNet = net.CreateClone(settings.batchSize);
836 
837  Log() << kINFO << "Training phase " << trainingPhase << " of "
838  << fTrainingSettings.size() << ":" << Endl;
839  trainingPhase++;
840 
841  using DataLoader_t = TDataLoader<TMVAInput_t, TCuda<>>;
842 
843  size_t nThreads = 1;
844  DataLoader_t trainingData(GetEventCollection(Types::kTraining),
845  nTrainingSamples,
846  net.GetBatchSize(),
847  net.GetInputWidth(),
848  net.GetOutputWidth(), nThreads);
849  DataLoader_t testData(GetEventCollection(Types::kTesting),
850  nTestSamples,
851  testNet.GetBatchSize(),
852  net.GetInputWidth(),
853  net.GetOutputWidth(), nThreads);
854  DNN::TGradientDescent<TCuda<>> minimizer(settings.learningRate,
855  settings.convergenceSteps,
856  settings.testInterval);
857 
858  std::vector<TNet<TCuda<>>> nets{};
859  std::vector<TBatch<TCuda<>>> batches{};
860  nets.reserve(nThreads);
861  for (size_t i = 0; i < nThreads; i++) {
862  nets.push_back(net);
863  for (size_t j = 0; j < net.GetDepth(); j++)
864  {
865  auto &masterLayer = net.GetLayer(j);
866  auto &layer = nets.back().GetLayer(j);
867  TCuda<>::Copy(layer.GetWeights(),
868  masterLayer.GetWeights());
869  TCuda<>::Copy(layer.GetBiases(),
870  masterLayer.GetBiases());
871  }
872  }
873 
874  bool converged = false;
875  size_t stepCount = 0;
876  size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
877 
878  std::chrono::time_point<std::chrono::system_clock> start, end;
879  start = std::chrono::system_clock::now();
880 
881  if (!fInteractive) {
882  Log() << std::setw(10) << "Epoch" << " | "
883  << std::setw(12) << "Train Err."
884  << std::setw(12) << "Test Err."
885  << std::setw(12) << "GFLOP/s"
886  << std::setw(12) << "Conv. Steps" << Endl;
887  std::string separator(62, '-');
888  Log() << separator << Endl;
889  }
890 
891  while (!converged)
892  {
893  stepCount++;
894 
895  // Perform minimization steps for a full epoch.
896  trainingData.Shuffle();
897  for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
898  batches.clear();
899  for (size_t j = 0; j < nThreads; j++) {
900  batches.reserve(nThreads);
901  batches.push_back(trainingData.GetBatch());
902  }
903  if (settings.momentum > 0.0) {
904  minimizer.StepMomentum(net, nets, batches, settings.momentum);
905  } else {
906  minimizer.Step(net, nets, batches);
907  }
908  }
909 
910  if ((stepCount % minimizer.GetTestInterval()) == 0) {
911 
912  // Compute test error.
913  Double_t testError = 0.0;
914  for (auto batch : testData) {
915  auto inputMatrix = batch.GetInput();
916  auto outputMatrix = batch.GetOutput();
917  testError += testNet.Loss(inputMatrix, outputMatrix);
918  }
919  testError /= (Double_t) (nTestSamples / settings.batchSize);
920 
921  end = std::chrono::system_clock::now();
922 
923  // Compute training error.
924  Double_t trainingError = 0.0;
925  for (auto batch : trainingData) {
926  auto inputMatrix = batch.GetInput();
927  auto outputMatrix = batch.GetOutput();
928  trainingError += net.Loss(inputMatrix, outputMatrix);
929  }
930  trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
931 
932  // Compute numerical throughput.
933  std::chrono::duration<double> elapsed_seconds = end - start;
934  double seconds = elapsed_seconds.count();
935  double nFlops = (double) (settings.testInterval * batchesInEpoch);
936  nFlops *= net.GetNFlops() * 1e-9;
937 
938  converged = minimizer.HasConverged(testError);
939  start = std::chrono::system_clock::now();
940 
941  if (fInteractive) {
942  fInteractive->AddPoint(stepCount, trainingError, testError);
943  fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
944  / minimizer.GetConvergenceSteps ();
945  if (fExitFromTraining) break;
946  } else {
947  Log() << std::setw(10) << stepCount << " | "
948  << std::setw(12) << trainingError
949  << std::setw(12) << testError
950  << std::setw(12) << nFlops / seconds
951  << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
952  if (converged) {
953  Log() << Endl;
954  }
955  }
956  }
957  }
958  for (size_t l = 0; l < net.GetDepth(); l++) {
959  fNet.GetLayer(l).GetWeights() = (TMatrixT<Double_t>) net.GetLayer(l).GetWeights();
960  fNet.GetLayer(l).GetBiases() = (TMatrixT<Double_t>) net.GetLayer(l).GetBiases();
961  }
962  }
963 
964 #else // DNNCUDA flag not set.
965 
966  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
967  "you have CUDA installed and it was successfully "
968  "detected by CMAKE." << Endl;
969 #endif // DNNCUDA
970 }
971 
972 ////////////////////////////////////////////////////////////////////////////////
973 
975 {
976 
977 #ifdef DNNCPU // Included only if DNNCPU flag is set.
978 
979  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size();
980  size_t nTestSamples = GetEventCollection(Types::kTesting).size();
981 
982  Log() << kINFO << "Start of neural network training on CPU." << Endl << Endl;
983 
984  fNet.Initialize(fWeightInitialization);
985 
986  size_t trainingPhase = 1;
987  for (TTrainingSettings & settings : fTrainingSettings) {
988 
989  if (fInteractive){
991  }
992 
993  Log() << "Training phase " << trainingPhase << " of "
994  << fTrainingSettings.size() << ":" << Endl;
995  trainingPhase++;
996 
997  TNet<TCpu<>> net(settings.batchSize, fNet);
998  net.SetWeightDecay(settings.weightDecay);
999  net.SetRegularization(settings.regularization);
1000  // Need to convert dropoutprobabilities to conventions used
1001  // by backend implementation.
1002  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1003  for (auto & p : dropoutVector) {
1004  p = 1.0 - p;
1005  }
1006  net.SetDropoutProbabilities(dropoutVector);
1007  //net.SetDropoutProbabilities(settings.dropoutProbabilities);
1008  net.InitializeGradients();
1009  auto testNet = net.CreateClone(settings.batchSize);
1010 
1011  using DataLoader_t = TDataLoader<TMVAInput_t, TCpu<>>;
1012 
1013  size_t nThreads = 1;
1014  DataLoader_t trainingData(GetEventCollection(Types::kTraining),
1015  nTrainingSamples,
1016  net.GetBatchSize(),
1017  net.GetInputWidth(),
1018  net.GetOutputWidth(), nThreads);
1019  DataLoader_t testData(GetEventCollection(Types::kTesting),
1020  nTestSamples,
1021  testNet.GetBatchSize(),
1022  net.GetInputWidth(),
1023  net.GetOutputWidth(), nThreads);
1024  DNN::TGradientDescent<TCpu<>> minimizer(settings.learningRate,
1025  settings.convergenceSteps,
1026  settings.testInterval);
1027 
1028  std::vector<TNet<TCpu<>>> nets{};
1029  std::vector<TBatch<TCpu<>>> batches{};
1030  nets.reserve(nThreads);
1031  for (size_t i = 0; i < nThreads; i++) {
1032  nets.push_back(net);
1033  for (size_t j = 0; j < net.GetDepth(); j++)
1034  {
1035  auto &masterLayer = net.GetLayer(j);
1036  auto &layer = nets.back().GetLayer(j);
1037  TCpu<>::Copy(layer.GetWeights(),
1038  masterLayer.GetWeights());
1039  TCpu<>::Copy(layer.GetBiases(),
1040  masterLayer.GetBiases());
1041  }
1042  }
1043 
1044  bool converged = false;
1045  size_t stepCount = 0;
1046  size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
1047 
1048  std::chrono::time_point<std::chrono::system_clock> start, end;
1049  start = std::chrono::system_clock::now();
1050 
1051  if (!fInteractive) {
1052  Log() << std::setw(10) << "Epoch" << " | "
1053  << std::setw(12) << "Train Err."
1054  << std::setw(12) << "Test Err."
1055  << std::setw(12) << "GFLOP/s"
1056  << std::setw(12) << "Conv. Steps" << Endl;
1057  std::string separator(62, '-');
1058  Log() << separator << Endl;
1059  }
1060 
1061  while (!converged)
1062  {
1063  stepCount++;
1064  // Perform minimization steps for a full epoch.
1065  trainingData.Shuffle();
1066  for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1067  batches.clear();
1068  for (size_t j = 0; j < nThreads; j++) {
1069  batches.reserve(nThreads);
1070  batches.push_back(trainingData.GetBatch());
1071  }
1072  if (settings.momentum > 0.0) {
1073  minimizer.StepMomentum(net, nets, batches, settings.momentum);
1074  } else {
1075  minimizer.Step(net, nets, batches);
1076  }
1077  }
1078 
1079  if ((stepCount % minimizer.GetTestInterval()) == 0) {
1080 
1081  // Compute test error.
1082  Double_t testError = 0.0;
1083  for (auto batch : testData) {
1084  auto inputMatrix = batch.GetInput();
1085  auto outputMatrix = batch.GetOutput();
1086  testError += testNet.Loss(inputMatrix, outputMatrix);
1087  }
1088  testError /= (Double_t) (nTestSamples / settings.batchSize);
1089 
1090  end = std::chrono::system_clock::now();
1091 
1092  // Compute training error.
1093  Double_t trainingError = 0.0;
1094  for (auto batch : trainingData) {
1095  auto inputMatrix = batch.GetInput();
1096  auto outputMatrix = batch.GetOutput();
1097  trainingError += net.Loss(inputMatrix, outputMatrix);
1098  }
1099  trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
1100 
1101  if (fInteractive){
1102  fInteractive->AddPoint(stepCount, trainingError, testError);
1103  fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
1104  if (fExitFromTraining) break;
1105  }
1106 
1107  // Compute numerical throughput.
1108  std::chrono::duration<double> elapsed_seconds = end - start;
1109  double seconds = elapsed_seconds.count();
1110  double nFlops = (double) (settings.testInterval * batchesInEpoch);
1111  nFlops *= net.GetNFlops() * 1e-9;
1112 
1113  converged = minimizer.HasConverged(testError);
1114  start = std::chrono::system_clock::now();
1115 
1116  if (fInteractive) {
1117  fInteractive->AddPoint(stepCount, trainingError, testError);
1118  fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1119  / minimizer.GetConvergenceSteps ();
1120  if (fExitFromTraining) break;
1121  } else {
1122  Log() << std::setw(10) << stepCount << " | "
1123  << std::setw(12) << trainingError
1124  << std::setw(12) << testError
1125  << std::setw(12) << nFlops / seconds
1126  << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1127  if (converged) {
1128  Log() << Endl;
1129  }
1130  }
1131  }
1132  }
1133 
1134 
1135  for (size_t l = 0; l < net.GetDepth(); l++) {
1136  auto & layer = fNet.GetLayer(l);
1137  layer.GetWeights() = (TMatrixT<Double_t>) net.GetLayer(l).GetWeights();
1138  layer.GetBiases() = (TMatrixT<Double_t>) net.GetLayer(l).GetBiases();
1139  }
1140  }
1141 
1142 #else // DNNCPU flag not set.
1143  Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1144  "you have a BLAS implementation and it was successfully "
1145  "detected by CMake as well that the imt CMake flag is set." << Endl;
1146 #endif // DNNCPU
1147 }
1148 
1149 ////////////////////////////////////////////////////////////////////////////////
1150 
1152 {
1153  size_t nVariables = GetEvent()->GetNVariables();
1154  Matrix_t X(1, nVariables);
1155  Matrix_t YHat(1, 1);
1156 
1157  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1158  for (size_t i = 0; i < nVariables; i++) {
1159  X(0,i) = inputValues[i];
1160  }
1161 
1162  fNet.Prediction(YHat, X, fOutputFunction);
1163  return YHat(0,0);
1164 }
1165 
1166 ////////////////////////////////////////////////////////////////////////////////
1167 
1168 const std::vector<Float_t> & TMVA::MethodDNN::GetRegressionValues()
1169 {
1170  size_t nVariables = GetEvent()->GetNVariables();
1171  Matrix_t X(1, nVariables);
1172 
1173  const Event *ev = GetEvent();
1174  const std::vector<Float_t>& inputValues = ev->GetValues();
1175  for (size_t i = 0; i < nVariables; i++) {
1176  X(0,i) = inputValues[i];
1177  }
1178 
1179  size_t nTargets = std::max(1u, ev->GetNTargets());
1180  Matrix_t YHat(1, nTargets);
1181  std::vector<Float_t> output(nTargets);
1182  auto net = fNet.CreateClone(1);
1183  net.Prediction(YHat, X, fOutputFunction);
1184 
1185  for (size_t i = 0; i < nTargets; i++)
1186  output[i] = YHat(0, i);
1187 
1188  if (fRegressionReturnVal == NULL) {
1189  fRegressionReturnVal = new std::vector<Float_t>();
1190  }
1191  fRegressionReturnVal->clear();
1192 
1193  Event * evT = new Event(*ev);
1194  for (size_t i = 0; i < nTargets; ++i) {
1195  evT->SetTarget(i, output[i]);
1196  }
1197 
1198  const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1199  for (size_t i = 0; i < nTargets; ++i) {
1200  fRegressionReturnVal->push_back(evT2->GetTarget(i));
1201  }
1202  delete evT;
1203  return *fRegressionReturnVal;
1204 }
1205 
1206 const std::vector<Float_t> & TMVA::MethodDNN::GetMulticlassValues()
1207 {
1208  size_t nVariables = GetEvent()->GetNVariables();
1209  Matrix_t X(1, nVariables);
1210  Matrix_t YHat(1, DataInfo().GetNClasses());
1211  if (fMulticlassReturnVal == NULL) {
1212  fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1213  }
1214 
1215  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1216  for (size_t i = 0; i < nVariables; i++) {
1217  X(0,i) = inputValues[i];
1218  }
1219 
1220  fNet.Prediction(YHat, X, fOutputFunction);
1221  for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1222  (*fMulticlassReturnVal)[i] = YHat(0, i);
1223  }
1224  return *fMulticlassReturnVal;
1225 }
1226 
1227 ////////////////////////////////////////////////////////////////////////////////
1228 
1229 void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const
1230 {
1231  void* nn = gTools().xmlengine().NewChild(parent, 0, "Weights");
1232  Int_t inputWidth = fNet.GetInputWidth();
1233  Int_t depth = fNet.GetDepth();
1234  char lossFunction = static_cast<char>(fNet.GetLossFunction());
1235  gTools().xmlengine().NewAttr(nn, 0, "InputWidth",
1236  gTools().StringFromInt(inputWidth));
1237  gTools().xmlengine().NewAttr(nn, 0, "Depth", gTools().StringFromInt(depth));
1238  gTools().xmlengine().NewAttr(nn, 0, "LossFunction", TString(lossFunction));
1239  gTools().xmlengine().NewAttr(nn, 0, "OutputFunction",
1240  TString(static_cast<char>(fOutputFunction)));
1241 
1242  for (Int_t i = 0; i < depth; i++) {
1243  const auto& layer = fNet.GetLayer(i);
1244  auto layerxml = gTools().xmlengine().NewChild(nn, 0, "Layer");
1245  int activationFunction = static_cast<int>(layer.GetActivationFunction());
1246  gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
1247  TString::Itoa(activationFunction, 10));
1248  WriteMatrixXML(layerxml, "Weights", layer.GetWeights());
1249  WriteMatrixXML(layerxml, "Biases", layer.GetBiases());
1250  }
1251 }
1252 
1253 ////////////////////////////////////////////////////////////////////////////////
1254 
1256 {
1257  auto netXML = gTools().GetChild(rootXML, "Weights");
1258  if (!netXML){
1259  netXML = rootXML;
1260  }
1261 
1262  fNet.Clear();
1263  fNet.SetBatchSize(1);
1264 
1265  size_t inputWidth, depth;
1266  gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1267  gTools().ReadAttr(netXML, "Depth", depth);
1268  char lossFunctionChar;
1269  gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1270  char outputFunctionChar;
1271  gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1272 
1273  fNet.SetInputWidth(inputWidth);
1274  fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1275  fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1276 
1277  size_t previousWidth = inputWidth;
1278  auto layerXML = gTools().xmlengine().GetChild(netXML, "Layer");
1279  for (size_t i = 0; i < depth; i++) {
1280  TString fString;
1282 
1283  // Read activation function.
1284  gTools().ReadAttr(layerXML, "ActivationFunction", fString);
1285  f = static_cast<EActivationFunction>(fString.Atoi());
1286 
1287  // Read number of neurons.
1288  size_t width;
1289  auto matrixXML = gTools().GetChild(layerXML, "Weights");
1290  gTools().ReadAttr(matrixXML, "rows", width);
1291 
1292  fNet.AddLayer(width, f);
1293  TMatrixT<Double_t> weights(width, previousWidth);
1294  TMatrixT<Double_t> biases(width, 1);
1295  ReadMatrixXML(layerXML, "Weights", weights);
1296  ReadMatrixXML(layerXML, "Biases", biases);
1297  fNet.GetLayer(i).GetWeights() = weights;
1298  fNet.GetLayer(i).GetBiases() = biases;
1299 
1300  layerXML = gTools().GetNextChild(layerXML);
1301  previousWidth = width;
1302  }
1303 }
1304 
1305 ////////////////////////////////////////////////////////////////////////////////
1306 
1307 void TMVA::MethodDNN::ReadWeightsFromStream( std::istream & /*istr*/)
1308 {
1309 }
1310 
1311 ////////////////////////////////////////////////////////////////////////////////
1312 
1314 {
1315  fRanking = new Ranking( GetName(), "Importance" );
1316  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1317  fRanking->AddRank( Rank( GetInputLabel(ivar), 1.0));
1318  }
1319  return fRanking;
1320 }
1321 
1322 ////////////////////////////////////////////////////////////////////////////////
1323 
1324 void TMVA::MethodDNN::MakeClassSpecific( std::ostream& /*fout*/,
1325  const TString& /*className*/ ) const
1326 {
1327 }
1328 
1329 ////////////////////////////////////////////////////////////////////////////////
1330 
1332 {
1333  // get help message text
1334  //
1335  // typical length of text line:
1336  // "|--------------------------------------------------------------|"
1337  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1338  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1339 
1340  Log() << Endl;
1341  Log() << col << "--- Short description:" << colres << Endl;
1342  Log() << Endl;
1343  Log() << "The DNN neural network is a feedforward" << Endl;
1344  Log() << "multilayer perceptron implementation. The DNN has a user-" << Endl;
1345  Log() << "defined hidden layer architecture, where the number of input (output)" << Endl;
1346  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1347  Log() << "signal and one background, regression or multiclass). " << Endl;
1348  Log() << Endl;
1349  Log() << col << "--- Performance optimisation:" << colres << Endl;
1350  Log() << Endl;
1351 
1352  const char* txt = "The DNN supports various options to improve performance in terms of training speed and \n \
1353 reduction of overfitting: \n \
1354 \n \
1355  - different training settings can be stacked. Such that the initial training \n\
1356  is done with a large learning rate and a large drop out fraction whilst \n \
1357  in a later stage learning rate and drop out can be reduced. \n \
1358  - drop out \n \
1359  [recommended: \n \
1360  initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \
1361  later training stage: 0.1 or 0.0 for all layers \n \
1362  final training stage: 0.0] \n \
1363  Drop out is a technique where a at each training cycle a fraction of arbitrary \n \
1364  nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \
1365  - L1 and L2 regularization are available \n \
1366  - Minibatches \n \
1367  [recommended 10 - 150] \n \
1368  Arbitrary mini-batch sizes can be chosen. \n \
1369  - Multithreading \n \
1370  [recommended: True] \n \
1371  Multithreading can be turned on. The minibatches are distributed to the available \n \
1372  cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \
1373  \n \
1374  Options: \n \
1375  \"Layout\": \n \
1376  - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \
1377  - meaning: \n \
1378  . two hidden layers (separated by \",\") \n \
1379  . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \
1380  . the activation function for the output layer is LINEAR \n \
1381  . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \
1382  . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \
1383  . the number of nodes in the output layer is determined by the number of output nodes \n \
1384  and can therefore not be chosen freely. \n \
1385  \n \
1386  \"ErrorStrategy\": \n \
1387  - SUMOFSQUARES \n \
1388  The error of the neural net is determined by a sum-of-squares error function \n \
1389  For regression, this is the only possible choice. \n \
1390  - CROSSENTROPY \n \
1391  The error of the neural net is determined by a cross entropy function. The \n \
1392  output values are automatically (internally) transformed into probabilities \n \
1393  using a sigmoid function. \n \
1394  For signal/background classification this is the default choice. \n \
1395  For multiclass using cross entropy more than one or no output classes \n \
1396  can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \
1397  A and C is true, Event 2: C is true, ...) \n \
1398  - MUTUALEXCLUSIVE \n \
1399  In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \
1400  \n \
1401  \"WeightInitialization\" \n \
1402  - XAVIER \n \
1403  [recommended] \n \
1404  \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \
1405  such that the variance of the values of the nodes is preserved for each layer. \n \
1406  - XAVIERUNIFORM \n \
1407  The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \
1408  - LAYERSIZE \n \
1409  Random values scaled by the layer size \n \
1410  \n \
1411  \"TrainingStrategy\" \n \
1412  - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \
1413  - explanation: two stacked training settings separated by \"|\" \n \
1414  . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \
1415  . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \
1416  . LearningRate : \n \
1417  - recommended for classification: 0.1 initially, 1e-4 later \n \
1418  - recommended for regression: 1e-4 and less \n \
1419  . Momentum : \n \
1420  preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \
1421  . Repetitions : \n \
1422  train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \
1423  . ConvergenceSteps : \n \
1424  Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \
1425  of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \
1426  cycle the test samples are evaluated and thus the convergence is checked) \n \
1427  . BatchSize \n \
1428  Size of the mini-batches. \n \
1429  . TestRepetitions \n \
1430  Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \
1431  . WeightDecay \n \
1432  If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \
1433  . Renormalize \n \
1434  NONE, L1 (|w|) or L2 (w^2) \n \
1435  . DropConfig \n \
1436  Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \
1437  in the DropConfig. \n \
1438  [example: DropConfig=0.0+0.5+0.3 \n \
1439  meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \
1440  in layer 2 \n \
1441  recommended: leave all the nodes turned on for the input layer (layer 0) \n \
1442  turn off half of the nodes in later layers for the initial training; leave all nodes \n \
1443  turned on (0.0) in later training stages] \n \
1444  . DropRepetitions \n \
1445  Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \
1446  [recommended : 1] \n \
1447  . Multithreading \n \
1448  turn on multithreading [recommended: True] \n \
1449  \n";
1450  Log () << txt << Endl;
1451 }
1452 
1453 } // namespace TMVA
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:577
void GetHelpMessage() const
Definition: MethodDNN.cxx:1331
An array of TObjects.
Definition: TObjArray.h:37
TXMLEngine & xmlengine()
Definition: Tools.h:270
LayoutVector_t ParseLayoutString(TString layerSpec)
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2079
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:212
void ProcessOptions()
Definition: MethodDNN.cxx:411
Collectable string class.
Definition: TObjString.h:28
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:334
double T(double x)
Definition: ChebyshevPol.h:34
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDNN.h:73
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: Net.h:381
UInt_t GetNvar() const
Definition: MethodBase.h:328
Config & gConfig()
TH1 * h
Definition: legend2.C:5
MsgLogger & Log() const
Definition: Configurable.h:122
void MakeClassSpecific(std::ostream &, const TString &) const
Definition: MethodDNN.cxx:1324
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
EAnalysisType
Definition: Types.h:125
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1112
bool fExitFromTraining
Definition: MethodBase.h:431
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1103
Basic string class.
Definition: TString.h:129
typename Architecture_t::Matrix_t Matrix_t
Definition: MethodDNN.h:69
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:378
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Scalar_t GetNFlops()
Definition: Net.h:350
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Definition: Pattern.h:7
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
UInt_t GetNTargets() const
Definition: MethodBase.h:330
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
#define NULL
Definition: RtypesCore.h:88
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1100
void SetWeightDecay(Scalar_t weightDecay)
Definition: Net.h:151
neural net
Definition: NeuralNet.h:1068
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:334
void ReadWeightsFromStream(std::istream &i)
Definition: MethodDNN.cxx:1307
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1290
std::vector< Double_t > dropoutProbabilities
Definition: MethodDNN.h:84
const Event * GetEvent() const
Definition: MethodBase.h:733
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
Definition: MethodBase.cxx:242
void ClearGraphs()
This function sets the point number to 0 for all graphs.
Definition: MethodBase.cxx:198
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDNN.cxx:1255
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1099
Generic neural network class.
Definition: Net.h:49
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:174
DataSetInfo & DataInfo() const
Definition: MethodBase.h:394
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:477
UInt_t fIPyCurrentIter
Definition: MethodBase.h:432
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:320
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1481
const TString & GetString() const
Definition: TObjString.h:47
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
TRandom2 r(17)
const char * GetName() const
Definition: MethodBase.h:318
TDataLoader.
Definition: DataLoader.h:73
The Formula class.
Definition: TFormula.h:83
const Ranking * CreateRanking()
Definition: MethodDNN.cxx:1313
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:563
UInt_t fIPyMaxIter
Definition: MethodBase.h:432
void SetRegularization(ERegularization R)
Definition: Net.h:149
unsigned int UInt_t
Definition: RtypesCore.h:42
const Handle_t kNone
Definition: GuiTypes.h:87
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
size_t GetDepth() const
Definition: Net.h:136
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:213
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1080
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
TLine * l
Definition: textangle.C:4
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:290
Tools & gTools()
Settings for the training of the neural net.
Definition: NeuralNet.h:736
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
Definition: Net.h:211
UInt_t GetNVariables() const
Definition: MethodBase.h:329
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309
const Bool_t kFALSE
Definition: RtypesCore.h:92
Layer defines the layout of a layer.
Definition: NeuralNet.h:676
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:668
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:488
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2251
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
size_t GetOutputWidth() const
Definition: Net.h:143
#define NONE
Definition: Rotated.cxx:52
void Copy(void *source, void *dest)
size_t GetInputWidth() const
Definition: Net.h:142
#define ClassImp(name)
Definition: Rtypes.h:336
double f(double x)
ModeOutputValues
Definition: NeuralNet.h:179
double Double_t
Definition: RtypesCore.h:55
Bool_t WriteOptionsReference() const
Definition: Config.h:62
Deep Neural Network Implementation.
Definition: MethodDNN.h:65
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDNN.cxx:307
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:580
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:710
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:43
int type
Definition: TGX11.cxx:120
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1173
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
DNN::ERegularization regularization
Definition: MethodDNN.h:80
void AddPreDefVal(const T &)
Definition: Configurable.h:168
Scalar_t Loss(const Matrix_t &Y, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
Definition: Net.h:305
void ExitFromTraining()
Definition: MethodBase.h:446
void DeclareOptions()
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
#define REGISTER_METHOD(CLASS)
for example
virtual ~MethodDNN()
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1101
Abstract ClassifierFactory template that handles arbitrary types.
Ranking * fRanking
Definition: MethodBase.h:569
std::vector< Float_t > & GetValues()
Definition: Event.h:89
IPythonInteractive * fInteractive
Definition: MethodBase.h:430
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDNN.cxx:1151
void InitializeGradients()
Initialize the gradients in the net to zero.
Definition: Net.h:262
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xml node
Definition: TXMLEngine.cxx:993
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:614
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1975
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:579
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:2041
void AddWeightsXMLTo(void *parent) const
Definition: MethodDNN.cxx:1229
size_t GetBatchSize() const
Definition: Net.h:137
const int nn
EnumRegularization
Definition: NeuralNet.h:173
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDNN.cxx:1206
const Bool_t kTRUE
Definition: RtypesCore.h:91
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDNN.cxx:1168
Layer_t & GetLayer(size_t i)
Definition: Net.h:138
const char * Data() const
Definition: TString.h:347