Logo ROOT   6.07/09
Reference Guide
MethodDNN.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDNN *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A neural network implementation *
12  * *
13  * Authors (alphabetical): *
14  * Simon Pfreundschuh <s.pfreundschuh@gmail.com> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@gmx.ch> - CERN, Switzerland *
16  * *
17  * Copyright (c) 2005-2015: *
18  * CERN, Switzerland *
19  * U. of Victoria, Canada *
20  * MPI-K Heidelberg, Germany *
21  * U. of Bonn, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 //______________________________________________________________________________
29 //
30 // Deep Neural Network Implementation
31 //______________________________________________________________________________
32 
33 #include "TString.h"
34 #include "TTree.h"
35 #include "TFile.h"
36 #include "TFormula.h"
37 
38 #include "TMVA/ClassifierFactory.h"
39 #include "TMVA/Configurable.h"
40 #include "TMVA/IMethod.h"
41 #include "TMVA/MsgLogger.h"
42 #include "TMVA/MethodBase.h"
43 #include "TMVA/MethodDNN.h"
44 #include "TMVA/Timer.h"
45 #include "TMVA/Types.h"
46 #include "TMVA/Tools.h"
47 #include "TMVA/Config.h"
48 #include "TMVA/Ranking.h"
49 
50 #include "TMVA/DNN/Net.h"
52 
53 #include "TMVA/NeuralNet.h"
54 #include "TMVA/Monitoring.h"
55 
56 #include <algorithm>
57 #include <iostream>
58 #include <string>
59 #include <iomanip>
60 
61 REGISTER_METHOD(DNN)
62 
63 ClassImp(TMVA::MethodDNN)
64 
65 using TMVA::DNN::EActivationFunction;
66 using TMVA::DNN::ELossFunction;
67 using TMVA::DNN::EInitialization;
68 using TMVA::DNN::EOutputFunction;
69 
70 namespace TMVA
71 {
72 
73 //______________________________________________________________________________
75  const TString& methodTitle,
76  DataSetInfo& theData,
77  const TString& theOption)
78  : MethodBase( jobName, Types::kDNN, methodTitle, theData, theOption),
79  fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
80  fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
81  fTrainingSettings(), fResume(false), fSettings()
82 {
83  // standard constructor
84 }
85 
86 //______________________________________________________________________________
88  const TString& theWeightFile)
89  : MethodBase( Types::kDNN, theData, theWeightFile),
93 {
94  // constructor from a weight file
95 }
96 
97 //______________________________________________________________________________
99 {
100  // destructor
101  // nothing to be done
102 }
103 
104 //______________________________________________________________________________
106  UInt_t numberClasses,
107  UInt_t /*numberTargets*/ )
108 {
109  // MLP can handle classification with 2 classes and regression with
110  // one regression-target
111  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
112  if (type == Types::kMulticlass ) return kTRUE;
113  if (type == Types::kRegression ) return kTRUE;
114 
115  return kFALSE;
116 }
117 
118 //______________________________________________________________________________
120 {
121  // default initializations
122 }
123 
124 //______________________________________________________________________________
126 {
127  // Options to be set in the option string:
128  //
129  // LearningRate <float> DNN learning rate parameter.
130  // DecayRate <float> Decay rate for learning parameter.
131  // TestRate <int> Period of validation set error computation.
132  // BatchSize <int> Number of event per batch.
133 
134  DeclareOptionRef(fLayoutString="SOFTSIGN|(N+100)*2,LINEAR",
135  "Layout",
136  "Layou of the network.");
137 
138  DeclareOptionRef(fErrorStrategy="CROSSENTROPY",
139  "ErrorStrategy",
140  "Loss function: Mean squared error (regression)"
141  " or cross entropy (binary classifcation).");
142  AddPreDefVal(TString("CROSSENTROPY"));
143  AddPreDefVal(TString("SUMOFSQUARES"));
144 
146  "WeightInitialization",
147  "Weight initialization strategy");
148  AddPreDefVal(TString("XAVIER"));
149  AddPreDefVal(TString("XAVIERUNIFORM"));
150 
152  "Architecture",
153  "Which architecture to perfrom the training on.");
154  AddPreDefVal(TString("STANDARD"));
155  AddPreDefVal(TString("CPU"));
156  AddPreDefVal(TString("GPU"));
157  AddPreDefVal(TString("OPENCL"));
158 
160  fTrainingStrategyString = "LearningRate=1e-1,"
161  "Momentum=0.3,"
162  "Repetitions=3,"
163  "ConvergenceSteps=50,"
164  "BatchSize=30,"
165  "TestRepetitions=7,"
166  "WeightDecay=0.0,"
167  "Renormalize=L2,"
168  "DropConfig=0.0,"
169  "DropRepetitions=5|LearningRate=1e-4,"
170  "Momentum=0.3,"
171  "Repetitions=3,"
172  "ConvergenceSteps=50,"
173  "BatchSize=20,"
174  "TestRepetitions=7,"
175  "WeightDecay=0.001,"
176  "Renormalize=L2,"
177  "DropConfig=0.0+0.5+0.5,"
178  "DropRepetitions=5,"
179  "Multithreading=True",
180  "TrainingStrategy",
181  "Defines the training strategies.");
182 }
183 
184 //______________________________________________________________________________
186  -> LayoutVector_t
187 {
188  // parse layout specification string and return a vector, each entry
189  // containing the number of neurons to go in each successive layer
190  LayoutVector_t layout;
191  const TString layerDelimiter(",");
192  const TString subDelimiter("|");
193 
194  const size_t inputSize = GetNvar();
195 
196  TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
197  TIter nextLayer (layerStrings);
198  TObjString* layerString = (TObjString*)nextLayer ();
199 
200  for (; layerString != nullptr; layerString = (TObjString*) nextLayer()) {
201  int numNodes = 0;
202  EActivationFunction activationFunction = EActivationFunction::kTanh;
203 
204  TObjArray* subStrings = layerString->GetString().Tokenize(subDelimiter);
205  TIter nextToken (subStrings);
206  TObjString* token = (TObjString *) nextToken();
207  int idxToken = 0;
208  for (; token != nullptr; token = (TObjString *) nextToken()) {
209  switch (idxToken)
210  {
211  case 0:
212  {
213  TString strActFnc (token->GetString ());
214  if (strActFnc == "RELU") {
215  activationFunction = DNN::EActivationFunction::kRelu;
216  } else if (strActFnc == "TANH") {
217  activationFunction = DNN::EActivationFunction::kTanh;
218  } else if (strActFnc == "SYMMRELU") {
219  activationFunction = DNN::EActivationFunction::kSymmRelu;
220  } else if (strActFnc == "SOFTSIGN") {
221  activationFunction = DNN::EActivationFunction::kSoftSign;
222  } else if (strActFnc == "SIGMOID") {
223  activationFunction = DNN::EActivationFunction::kSigmoid;
224  } else if (strActFnc == "LINEAR") {
225  activationFunction = DNN::EActivationFunction::kIdentity;
226  } else if (strActFnc == "GAUSS") {
227  activationFunction = DNN::EActivationFunction::kGauss;
228  }
229  }
230  break;
231  case 1: // number of nodes
232  {
233  TString strNumNodes (token->GetString ());
234  TString strN ("x");
235  strNumNodes.ReplaceAll ("N", strN);
236  strNumNodes.ReplaceAll ("n", strN);
237  TFormula fml ("tmp",strNumNodes);
238  numNodes = fml.Eval (inputSize);
239  }
240  break;
241  }
242  ++idxToken;
243  }
244  layout.push_back(std::make_pair(numNodes, activationFunction));
245  }
246  return layout;
247 }
248 
249 // parse key value pairs in blocks -> return vector of blocks with map of key value pairs
250 //______________________________________________________________________________
252  TString blockDelim,
253  TString tokenDelim)
255 {
256  KeyValueVector_t blockKeyValues;
257  const TString keyValueDelim ("=");
258 
259  TObjArray* blockStrings = parseString.Tokenize (blockDelim);
260  TIter nextBlock (blockStrings);
261  TObjString* blockString = (TObjString *) nextBlock();
262 
263  for (; blockString != nullptr; blockString = (TObjString *) nextBlock())
264  {
265  blockKeyValues.push_back (std::map<TString,TString>());
266  std::map<TString,TString>& currentBlock = blockKeyValues.back ();
267 
268  TObjArray* subStrings = blockString->GetString ().Tokenize (tokenDelim);
269  TIter nextToken (subStrings);
270  TObjString* token = (TObjString*)nextToken ();
271 
272  for (; token != nullptr; token = (TObjString *)nextToken())
273  {
274  TString strKeyValue (token->GetString ());
275  int delimPos = strKeyValue.First (keyValueDelim.Data ());
276  if (delimPos <= 0)
277  continue;
278 
279  TString strKey = TString (strKeyValue (0, delimPos));
280  strKey.ToUpper();
281  TString strValue = TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
282 
283  strKey.Strip (TString::kBoth, ' ');
284  strValue.Strip (TString::kBoth, ' ');
285 
286  currentBlock.insert (std::make_pair (strKey, strValue));
287  }
288  }
289  return blockKeyValues;
290 }
291 
292 //______________________________________________________________________________
293 TString fetchValue (const std::map<TString, TString>& keyValueMap, TString key)
294 {
295  key.ToUpper ();
296  std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
297  if (it == keyValueMap.end()) {
298  return TString ("");
299  }
300  return it->second;
301 }
302 
303 //______________________________________________________________________________
304 template <typename T>
305 T fetchValue(const std::map<TString,TString>& keyValueMap,
306  TString key,
307  T defaultValue);
308 
309 //______________________________________________________________________________
310 template <>
311 int fetchValue(const std::map<TString,TString>& keyValueMap,
312  TString key,
313  int defaultValue)
314 {
315  TString value (fetchValue (keyValueMap, key));
316  if (value == "") {
317  return defaultValue;
318  }
319  return value.Atoi ();
320 }
321 
322 //______________________________________________________________________________
323 template <>
324 double fetchValue (const std::map<TString,TString>& keyValueMap,
325  TString key, double defaultValue)
326 {
327  TString value (fetchValue (keyValueMap, key));
328  if (value == "") {
329  return defaultValue;
330  }
331  return value.Atof ();
332 }
333 
334 //______________________________________________________________________________
335 template <>
336 TString fetchValue (const std::map<TString,TString>& keyValueMap,
337  TString key, TString defaultValue)
338 {
339  TString value (fetchValue (keyValueMap, key));
340  if (value == "") {
341  return defaultValue;
342  }
343  return value;
344 }
345 
346 //______________________________________________________________________________
347 template <>
348 bool fetchValue (const std::map<TString,TString>& keyValueMap,
349  TString key, bool defaultValue)
350 {
351  TString value (fetchValue (keyValueMap, key));
352  if (value == "") {
353  return defaultValue;
354  }
355  value.ToUpper ();
356  if (value == "TRUE" || value == "T" || value == "1") {
357  return true;
358  }
359  return false;
360 }
361 
362 //______________________________________________________________________________
363 template <>
364 std::vector<double> fetchValue(const std::map<TString, TString> & keyValueMap,
365  TString key,
366  std::vector<double> defaultValue)
367 {
368  TString parseString (fetchValue (keyValueMap, key));
369  if (parseString == "") {
370  return defaultValue;
371  }
372  parseString.ToUpper ();
373  std::vector<double> values;
374 
375  const TString tokenDelim ("+");
376  TObjArray* tokenStrings = parseString.Tokenize (tokenDelim);
377  TIter nextToken (tokenStrings);
378  TObjString* tokenString = (TObjString*)nextToken ();
379  for (; tokenString != NULL; tokenString = (TObjString*)nextToken ()) {
380  std::stringstream sstr;
381  double currentValue;
382  sstr << tokenString->GetString ().Data ();
383  sstr >> currentValue;
384  values.push_back (currentValue);
385  }
386  return values;
387 }
388 
389 //______________________________________________________________________________
391 {
393  Log() << kINFO
394  << "Will ignore negative events in training!"
395  << Endl;
396  }
397 
398  //
399  // Set network structure.
400  //
401 
403  size_t inputSize = GetNVariables ();
404  size_t outputSize = (GetNTargets() == 0) ? 1 : GetNTargets();
405 
406  fNet.SetBatchSize(1);
407  fNet.SetInputWidth(inputSize);
408 
409  auto itLayout = std::begin (fLayout);
410  auto itLayoutEnd = std::end (fLayout)-1;
411  for ( ; itLayout != itLayoutEnd; ++itLayout) {
412  fNet.AddLayer((*itLayout).first, (*itLayout).second);
413  }
414  fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
415 
416  //
417  // Loss function and output.
418  //
419 
420  fOutputFunction = EOutputFunction::kSigmoid;
422  {
423  if (fErrorStrategy == "SUMOFSQUARES") {
424  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
425  }
426  if (fErrorStrategy == "CROSSENTROPY") {
428  }
429  fOutputFunction = EOutputFunction::kSigmoid;
430  } else if (fAnalysisType == Types::kRegression) {
431  if (fErrorStrategy != "SUMOFSQUARES") {
432  Log () << kWARNING << "For regression only SUMOFSQUARES is a valid "
433  << " neural net error function. Setting error function to "
434  << " SUMOFSQUARES now." << Endl;
435  }
436  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
437  fOutputFunction = EOutputFunction::kIdentity;
438  } else if (fAnalysisType == Types::kMulticlass) {
439  if (fErrorStrategy == "SUMOFSQUARES") {
440  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
441  }
442  if (fErrorStrategy == "CROSSENTROPY") {
444  }
445  if (fErrorStrategy == "MUTUALEXCLUSIVE") {
446  Log () << kFatal << "MUTUALEXCLUSIVE not yet implemented." << Endl;
447  }
448  fOutputFunction = EOutputFunction::kSigmoid;
449  }
450 
451  //
452  // Initialization
453  //
454 
455  if (fWeightInitializationString == "XAVIER") {
457  }
458  else if (fWeightInitializationString == "XAVIERUNIFORM") {
460  }
461  else {
463  }
464 
465  //
466  // Training settings.
467  //
468 
470  TString ("|"),
471  TString (","));
472  for (auto& block : strategyKeyValues) {
473  TTrainingSettings settings;
474 
475  settings.convergenceSteps = fetchValue(block, "ConvergenceSteps", 100);
476  settings.batchSize = fetchValue(block, "BatchSize", 30);
477  settings.testInterval = fetchValue(block, "TestRepetitions", 7);
478  settings.weightDecay = fetchValue(block, "WeightDecay", 0.0);
479  settings.learningRate = fetchValue(block, "LearningRate", 1e-5);
480  settings.momentum = fetchValue(block, "Momentum", 0.3);
481  settings.dropoutProbabilities = fetchValue(block, "DropConfig",
482  std::vector<Double_t>());
483 
484  TString regularization = fetchValue(block, "Regularization",
485  TString ("NONE"));
486  if (regularization == "L1") {
488  } else if (regularization == "L2") {
490  }
491 
492  TString strMultithreading = fetchValue(block, "Multithreading",
493  TString ("True"));
494  if (strMultithreading.BeginsWith ("T")) {
495  settings.multithreading = true;
496  } else {
497  settings.multithreading = false;
498  }
499 
500  fTrainingSettings.push_back(settings);
501  }
502 }
503 
504 //______________________________________________________________________________
506 {
508  std::vector<TString> titles = {"Error on training set", "Error on test set"};
509  fInteractive->Init(titles);
510  // JsMVA progress bar maximum (100%)
511  fIPyMaxIter = 100;
512  }
513 
514  if (fArchitectureString == "GPU") {
515  TrainGpu();
518  return;
519  } else if (fArchitectureString == "OpenCL") {
520  Log() << kFATAL << "OpenCL backend not yes supported." << Endl;
521  return;
522  } else if (fArchitectureString == "CPU") {
523  TrainCpu<Double_t>();
526  return;
527  }
528 
529  Log() << kINFO << "Using Standard Implementation.";
530 
531  std::vector<Pattern> trainPattern;
532  std::vector<Pattern> testPattern;
533 
534  const std::vector<TMVA::Event*>& eventCollectionTraining = GetEventCollection (Types::kTraining);
535  const std::vector<TMVA::Event*>& eventCollectionTesting = GetEventCollection (Types::kTesting);
536 
537  for (auto &event : eventCollectionTraining) {
538  const std::vector<Float_t>& values = event->GetValues();
540  double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
541  trainPattern.push_back(Pattern (values.begin(),
542  values.end(),
543  outputValue,
544  event->GetWeight()));
545  trainPattern.back().addInput(1.0);
546  } else {
547  const std::vector<Float_t>& targets = event->GetTargets ();
548  trainPattern.push_back(Pattern(values.begin(),
549  values.end(),
550  targets.begin(),
551  targets.end(),
552  event->GetWeight ()));
553  trainPattern.back ().addInput (1.0); // bias node
554  }
555  }
556 
557  for (auto &event : eventCollectionTesting) {
558  const std::vector<Float_t>& values = event->GetValues();
560  double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
561  testPattern.push_back(Pattern (values.begin(),
562  values.end(),
563  outputValue,
564  event->GetWeight()));
565  testPattern.back().addInput(1.0);
566  } else {
567  const std::vector<Float_t>& targets = event->GetTargets ();
568  testPattern.push_back(Pattern(values.begin(),
569  values.end(),
570  targets.begin(),
571  targets.end(),
572  event->GetWeight ()));
573  testPattern.back ().addInput (1.0); // bias node
574  }
575  }
576 
577  TMVA::DNN::Net net;
578  std::vector<double> weights;
579 
581 
582  net.setInputSize(fNet.GetInputWidth() + 1);
583  net.setOutputSize(fNet.GetOutputWidth() + 1);
584 
585  for (size_t i = 0; i < fNet.GetDepth(); i++) {
586  EActivationFunction f = fNet.GetLayer(i).GetActivationFunction();
587  EnumFunction g = EnumFunction::LINEAR;
588  switch(f) {
589  case EActivationFunction::kIdentity: g = EnumFunction::LINEAR; break;
590  case EActivationFunction::kRelu: g = EnumFunction::RELU; break;
591  case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID; break;
592  case EActivationFunction::kTanh: g = EnumFunction::TANH; break;
593  case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU; break;
594  case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN; break;
595  case EActivationFunction::kGauss: g = EnumFunction::GAUSS; break;
596  }
597  if (i < fNet.GetDepth() - 1) {
598  net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g));
599  } else {
600  ModeOutputValues h = ModeOutputValues::DIRECT;
601  switch(fOutputFunction) {
602  case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT; break;
603  case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID; break;
604  }
605  net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g, h));
606  }
607  }
608 
609  switch(fNet.GetLossFunction()) {
610  case ELossFunction::kMeanSquaredError:
611  net.setErrorFunction(ModeErrorFunction::SUMOFSQUARES);
612  break;
614  net.setErrorFunction(ModeErrorFunction::CROSSENTROPY);
615  break;
616  }
617 
618  switch(fWeightInitialization) {
619  case EInitialization::kGauss:
620  net.initializeWeights(WeightInitializationStrategy::XAVIER,
621  std::back_inserter(weights));
622  break;
623  case EInitialization::kUniform:
624  net.initializeWeights(WeightInitializationStrategy::XAVIERUNIFORM,
625  std::back_inserter(weights));
626  break;
627  default:
628  net.initializeWeights(WeightInitializationStrategy::XAVIER,
629  std::back_inserter(weights));
630  break;
631  }
632 
633 
634  int idxSetting = 0;
635  for (auto s : fTrainingSettings) {
636 
638  switch(s.regularization) {
640  case ERegularization::kL1: r = EnumRegularization::L1; break;
641  case ERegularization::kL2: r = EnumRegularization::L2; break;
642  }
643 
644  Settings * settings = new Settings(TString(), s.convergenceSteps, s.batchSize,
645  s.testInterval, s.weightDecay, r,
646  MinimizerType::fSteepest, s.learningRate,
647  s.momentum, 1, s.multithreading);
648  std::shared_ptr<Settings> ptrSettings(settings);
649  ptrSettings->setMonitoring (0);
650  Log() << kINFO
651  << "Training with learning rate = " << ptrSettings->learningRate ()
652  << ", momentum = " << ptrSettings->momentum ()
653  << ", repetitions = " << ptrSettings->repetitions ()
654  << Endl;
655 
656  ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
657  (idxSetting+1)*100.0/(fSettings.size ()));
658 
659  const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
660  if (!dropConfig.empty ()) {
661  Log () << kINFO << "Drop configuration" << Endl
662  << " drop repetitions = " << ptrSettings->dropRepetitions()
663  << Endl;
664  }
665 
666  int idx = 0;
667  for (auto f : dropConfig) {
668  Log () << kINFO << " Layer " << idx << " = " << f << Endl;
669  ++idx;
670  }
671  Log () << kINFO << Endl;
672 
673  DNN::Steepest minimizer(ptrSettings->learningRate(),
674  ptrSettings->momentum(),
675  ptrSettings->repetitions());
676  net.train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
677  ptrSettings.reset();
678  Log () << kINFO << Endl;
679  idxSetting++;
680  }
681  size_t weightIndex = 0;
682  for (size_t l = 0; l < fNet.GetDepth(); l++) {
683  auto & layerWeights = fNet.GetLayer(l).GetWeights();
684  for (Int_t j = 0; j < layerWeights.GetNcols(); j++) {
685  for (Int_t i = 0; i < layerWeights.GetNrows(); i++) {
686  layerWeights(i,j) = weights[weightIndex];
687  weightIndex++;
688  }
689  }
690  auto & layerBiases = fNet.GetLayer(l).GetBiases();
691  if (l == 0) {
692  for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
693  layerBiases(i,0) = weights[weightIndex];
694  weightIndex++;
695  }
696  } else {
697  for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
698  layerBiases(i,0) = 0.0;
699  }
700  }
701  }
704 }
705 
706 //______________________________________________________________________________
708 {
709 
710 #ifdef DNNCUDA // Included only if DNNCUDA flag is set.
711 
712  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size();
713  size_t nTestSamples = GetEventCollection(Types::kTesting).size();
714 
715  Log() << kINFO << "Start of neural network training on GPU." << Endl;
716 
717  size_t trainingPhase = 1;
719  for (TTrainingSettings & settings : fTrainingSettings) {
720 
721  if (fInteractive){
723  }
724 
725  TNet<TCuda<>> net(settings.batchSize, fNet);
726  net.SetWeightDecay(settings.weightDecay);
727  net.SetRegularization(settings.regularization);
728  net.SetDropoutProbabilities(settings.dropoutProbabilities);
729  net.InitializeGradients();
730  auto testNet = net.CreateClone(settings.batchSize);
731 
732  Log() << kINFO << "Training phase " << trainingPhase << " of "
733  << fTrainingSettings.size() << ":" << Endl;
734  trainingPhase++;
735 
736  using DataLoader_t = TDataLoader<TMVAInput_t, TCuda<>>;
737 
738  size_t nThreads = 1;
739  DataLoader_t trainingData(GetEventCollection(Types::kTraining),
740  nTrainingSamples,
741  net.GetBatchSize(),
742  net.GetInputWidth(),
743  net.GetOutputWidth(), nThreads);
744  DataLoader_t testData(GetEventCollection(Types::kTesting),
745  nTestSamples,
746  testNet.GetBatchSize(),
747  net.GetInputWidth(),
748  net.GetOutputWidth(), nThreads);
749  DNN::TGradientDescent<TCuda<>> minimizer(settings.learningRate,
750  settings.convergenceSteps,
751  settings.testInterval);
752 
753  std::vector<TNet<TCuda<>>> nets{};
754  std::vector<TBatch<TCuda<>>> batches{};
755  nets.reserve(nThreads);
756  for (size_t i = 0; i < nThreads; i++) {
757  nets.push_back(net);
758  for (size_t j = 0; j < net.GetDepth(); j++)
759  {
760  auto &masterLayer = net.GetLayer(j);
761  auto &layer = nets.back().GetLayer(j);
762  TCuda<>::Copy(layer.GetWeights(),
763  masterLayer.GetWeights());
764  TCuda<>::Copy(layer.GetBiases(),
765  masterLayer.GetBiases());
766  }
767  }
768 
769  bool converged = false;
770  size_t stepCount = 0;
771  size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
772 
773  std::chrono::time_point<std::chrono::system_clock> start, end;
774  start = std::chrono::system_clock::now();
775 
776  Log() << std::setw(10) << "Epoch" << " | "
777  << std::setw(12) << "Train Err."
778  << std::setw(12) << "Test Err."
779  << std::setw(12) << "GFLOP/s"
780  << std::setw(12) << "Conv. Steps" << Endl;
781  std::string separator(62, '-');
782  Log() << separator << Endl;
783 
784  while (!converged)
785  {
786  stepCount++;
787 
788  // Perform minimization steps for a full epoch.
789  trainingData.Shuffle();
790  for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
791  batches.clear();
792  for (size_t j = 0; j < nThreads; j++) {
793  batches.reserve(nThreads);
794  batches.push_back(trainingData.GetBatch());
795  }
796  if (settings.momentum > 0.0) {
797  minimizer.StepMomentum(net, nets, batches, settings.momentum);
798  } else {
799  minimizer.Step(net, nets, batches);
800  }
801  }
802 
803  if ((stepCount % minimizer.GetTestInterval()) == 0) {
804 
805  // Compute test error.
806  Double_t testError = 0.0;
807  for (auto batch : testData) {
808  auto inputMatrix = batch.GetInput();
809  auto outputMatrix = batch.GetOutput();
810  testError += testNet.Loss(inputMatrix, outputMatrix);
811  }
812  testError /= (Double_t) (nTestSamples / settings.batchSize);
813 
814  end = std::chrono::system_clock::now();
815 
816  // Compute training error.
817  Double_t trainingError = 0.0;
818  for (auto batch : trainingData) {
819  auto inputMatrix = batch.GetInput();
820  auto outputMatrix = batch.GetOutput();
821  trainingError += net.Loss(inputMatrix, outputMatrix);
822  }
823  trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
824 
825  if (fInteractive){
826  fInteractive->AddPoint(stepCount, trainingError, testError);
827  fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
828  if (fExitFromTraining) break;
829  }
830 
831  // Compute numerical throughput.
832  std::chrono::duration<double> elapsed_seconds = end - start;
833  double seconds = elapsed_seconds.count();
834  double nFlops = (double) (settings.testInterval * batchesInEpoch);
835  nFlops *= net.GetNFlops() * 1e-9;
836 
837  converged = minimizer.HasConverged(testError);
838  start = std::chrono::system_clock::now();
839 
840  Log() << std::setw(10) << stepCount << " | "
841  << std::setw(12) << trainingError
842  << std::setw(12) << testError
843  << std::setw(12) << nFlops / seconds
844  << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
845  if (converged) {
846  Log() << Endl;
847  }
848  }
849  }
850  for (size_t l = 0; l < net.GetDepth(); l++) {
851  fNet.GetLayer(l).GetWeights() = (TMatrixT<Double_t>) net.GetLayer(l).GetWeights();
852  fNet.GetLayer(l).GetBiases() = (TMatrixT<Double_t>) net.GetLayer(l).GetBiases();
853  }
854  }
855 
856 #else // DNNCUDA flag not set.
857 
858  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
859  "you have CUDA installed and it was successfully "
860  "detected by CMAKE." << Endl;
861 #endif // DNNCUDA
862 }
863 
864 //______________________________________________________________________________
865 template<typename AFloat>
867 {
868 
869 #ifdef DNNCPU // Included only if DNNCPU flag is set.
870 
871  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size();
872  size_t nTestSamples = GetEventCollection(Types::kTesting).size();
873 
874  Log() << kINFO << "Start of neural network training on CPU." << Endl << Endl;
875 
877 
878  size_t trainingPhase = 1;
879  for (TTrainingSettings & settings : fTrainingSettings) {
880 
881  if (fInteractive){
883  }
884 
885  Log() << "Training phase " << trainingPhase << " of "
886  << fTrainingSettings.size() << ":" << Endl;
887  trainingPhase++;
888 
889  TNet<TCpu<AFloat>> net(settings.batchSize, fNet);
890  net.SetWeightDecay(settings.weightDecay);
891  net.SetRegularization(settings.regularization);
892  net.SetDropoutProbabilities(settings.dropoutProbabilities);
893  net.InitializeGradients();
894  auto testNet = net.CreateClone(settings.batchSize);
895 
896  using DataLoader_t = TDataLoader<TMVAInput_t, TCpu<AFloat>>;
897 
898  size_t nThreads = 1;
899  DataLoader_t trainingData(GetEventCollection(Types::kTraining),
900  nTrainingSamples,
901  net.GetBatchSize(),
902  net.GetInputWidth(),
903  net.GetOutputWidth(), nThreads);
904  DataLoader_t testData(GetEventCollection(Types::kTesting),
905  nTestSamples,
906  testNet.GetBatchSize(),
907  net.GetInputWidth(),
908  net.GetOutputWidth(), nThreads);
909  DNN::TGradientDescent<TCpu<AFloat>> minimizer(settings.learningRate,
910  settings.convergenceSteps,
911  settings.testInterval);
912 
913  std::vector<TNet<TCpu<AFloat>>> nets{};
914  std::vector<TBatch<TCpu<AFloat>>> batches{};
915  nets.reserve(nThreads);
916  for (size_t i = 0; i < nThreads; i++) {
917  nets.push_back(net);
918  for (size_t j = 0; j < net.GetDepth(); j++)
919  {
920  auto &masterLayer = net.GetLayer(j);
921  auto &layer = nets.back().GetLayer(j);
922  TCpu<AFloat>::Copy(layer.GetWeights(),
923  masterLayer.GetWeights());
924  TCpu<AFloat>::Copy(layer.GetBiases(),
925  masterLayer.GetBiases());
926  }
927  }
928 
929  bool converged = false;
930  size_t stepCount = 0;
931  size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
932 
933  std::chrono::time_point<std::chrono::system_clock> start, end;
934  start = std::chrono::system_clock::now();
935 
936  Log() << std::setw(10) << "Epoch" << " | "
937  << std::setw(12) << "Train Err."
938  << std::setw(12) << "Test Err."
939  << std::setw(12) << "GFLOP/s"
940  << std::setw(12) << "Conv. Steps" << Endl;
941  std::string separator(62, '-');
942  Log() << separator << Endl;
943 
944  while (!converged)
945  {
946  stepCount++;
947  // Perform minimization steps for a full epoch.
948  trainingData.Shuffle();
949  for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
950  batches.clear();
951  for (size_t j = 0; j < nThreads; j++) {
952  batches.reserve(nThreads);
953  batches.push_back(trainingData.GetBatch());
954  }
955  if (settings.momentum > 0.0) {
956  minimizer.StepMomentum(net, nets, batches, settings.momentum);
957  } else {
958  minimizer.Step(net, nets, batches);
959  }
960  }
961 
962  if ((stepCount % minimizer.GetTestInterval()) == 0) {
963 
964  // Compute test error.
965  AFloat testError = 0.0;
966  for (auto batch : testData) {
967  auto inputMatrix = batch.GetInput();
968  auto outputMatrix = batch.GetOutput();
969  testError += testNet.Loss(inputMatrix, outputMatrix);
970  }
971  testError /= (Double_t) (nTestSamples / settings.batchSize);
972 
973  end = std::chrono::system_clock::now();
974 
975  // Compute training error.
976  AFloat trainingError = 0.0;
977  for (auto batch : trainingData) {
978  auto inputMatrix = batch.GetInput();
979  auto outputMatrix = batch.GetOutput();
980  trainingError += net.Loss(inputMatrix, outputMatrix);
981  }
982  trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
983 
984  if (fInteractive){
985  fInteractive->AddPoint(stepCount, trainingError, testError);
986  fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
987  if (fExitFromTraining) break;
988  }
989 
990  // Compute numerical throughput.
991  std::chrono::duration<double> elapsed_seconds = end - start;
992  double seconds = elapsed_seconds.count();
993  double nFlops = (double) (settings.testInterval * batchesInEpoch);
994  nFlops *= net.GetNFlops() * 1e-9;
995 
996  converged = minimizer.HasConverged(testError);
997  start = std::chrono::system_clock::now();
998 
999  Log() << std::setw(10) << stepCount << " | "
1000  << std::setw(12) << trainingError
1001  << std::setw(12) << testError
1002  << std::setw(12) << nFlops / seconds
1003  << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1004  if (converged) {
1005  Log() << Endl;
1006  }
1007  }
1008  }
1009 
1010 
1011  for (size_t l = 0; l < net.GetDepth(); l++) {
1012  auto & layer = fNet.GetLayer(l);
1013  layer.GetWeights() = (TMatrixT<Double_t>) net.GetLayer(l).GetWeights();
1014  layer.GetBiases() = (TMatrixT<Double_t>) net.GetLayer(l).GetBiases();
1015  }
1016  }
1017 
1018 #else // DNNCPU flag not set.
1019  Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1020  "you have a BLAS implementation and it was successfully "
1021  "detected by CMake as well that the imt CMake flag is set." << Endl;
1022 #endif // DNNCPU
1023 }
1024 
1025 //______________________________________________________________________________
1027 {
1028  size_t nVariables = GetEvent()->GetNVariables();
1029  Matrix_t X(1, nVariables);
1030  Matrix_t YHat(1, 1);
1031 
1032  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1033  for (size_t i = 0; i < nVariables; i++) {
1034  X(0,i) = inputValues[i];
1035  }
1036 
1037  fNet.Prediction(YHat, X, fOutputFunction);
1038  return YHat(0,0);
1039 }
1040 
1041 //______________________________________________________________________________
1042 const std::vector<Float_t> &TMVA::MethodDNN::GetRegressionValues()
1043 {
1044  size_t nVariables = GetEvent()->GetNVariables();
1045  Matrix_t X(1, nVariables);
1046 
1047  const Event *ev = GetEvent();
1048  const std::vector<Float_t>& inputValues = ev->GetValues();
1049  for (size_t i = 0; i < nVariables; i++) {
1050  X(0,i) = inputValues[i];
1051  }
1052 
1053  size_t nTargets = std::max(1u, ev->GetNTargets());
1054  Matrix_t YHat(1, nTargets);
1055  std::vector<Float_t> output(nTargets);
1056  auto net = fNet.CreateClone(1);
1057  net.Prediction(YHat, X, fOutputFunction);
1058 
1059  for (size_t i = 0; i < nTargets; i++)
1060  output[i] = YHat(0, i);
1061 
1062  if (fRegressionReturnVal == NULL) {
1063  fRegressionReturnVal = new std::vector<Float_t>();
1064  }
1065  fRegressionReturnVal->clear();
1066 
1067  Event * evT = new Event(*ev);
1068  for (size_t i = 0; i < nTargets; ++i) {
1069  evT->SetTarget(i, output[i]);
1070  }
1071 
1072  const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1073  for (size_t i = 0; i < nTargets; ++i) {
1074  fRegressionReturnVal->push_back(evT2->GetTarget(i));
1075  }
1076  delete evT;
1077  return *fRegressionReturnVal;
1078 }
1079 
1080 const std::vector<Float_t> &TMVA::MethodDNN::GetMulticlassValues()
1081 {
1082  Log() << kFATAL << "ERROR: Multiclass classification not yet implemented."
1083  << Endl;
1084  return *fMulticlassReturnVal;
1085 }
1086 //______________________________________________________________________________
1087 void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const
1088 {
1089  void* nn = gTools().xmlengine().NewChild(parent, 0, "Weights");
1090  Int_t inputWidth = fNet.GetInputWidth();
1091  Int_t depth = fNet.GetDepth();
1092  char lossFunction = static_cast<char>(fNet.GetLossFunction());
1093  gTools().xmlengine().NewAttr(nn, 0, "InputWidth",
1094  gTools().StringFromInt(inputWidth));
1095  gTools().xmlengine().NewAttr(nn, 0, "Depth", gTools().StringFromInt(depth));
1096  gTools().xmlengine().NewAttr(nn, 0, "LossFunction", TString(lossFunction));
1097  gTools().xmlengine().NewAttr(nn, 0, "OutputFunction",
1098  TString(static_cast<char>(fOutputFunction)));
1099 
1100  for (Int_t i = 0; i < depth; i++) {
1101  const auto& layer = fNet.GetLayer(i);
1102  auto layerxml = gTools().xmlengine().NewChild(nn, 0, "Layer");
1103  int activationFunction = static_cast<int>(layer.GetActivationFunction());
1104  gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
1105  TString::Itoa(activationFunction, 10));
1106  WriteMatrixXML(layerxml, "Weights", layer.GetWeights());
1107  WriteMatrixXML(layerxml, "Biases", layer.GetBiases());
1108  }
1109 }
1110 
1111 //______________________________________________________________________________
1113 {
1114  auto netXML = gTools().GetChild(rootXML, "Weights");
1115  if (!netXML){
1116  netXML = rootXML;
1117  }
1118 
1119  fNet.Clear();
1120  fNet.SetBatchSize(1);
1121 
1122  size_t inputWidth, depth;
1123  gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1124  gTools().ReadAttr(netXML, "Depth", depth);
1125  char lossFunctionChar;
1126  gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1127  char outputFunctionChar;
1128  gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1129 
1130  fNet.SetInputWidth(inputWidth);
1131  fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1132  fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1133 
1134  size_t previousWidth = inputWidth;
1135  auto layerXML = gTools().xmlengine().GetChild(netXML, "Layer");
1136  for (size_t i = 0; i < depth; i++) {
1137  TString fString;
1138  EActivationFunction f;
1139 
1140  // Read activation function.
1141  gTools().ReadAttr(layerXML, "ActivationFunction", fString);
1142  f = static_cast<EActivationFunction>(fString.Atoi());
1143 
1144  // Read number of neurons.
1145  size_t width;
1146  auto matrixXML = gTools().GetChild(layerXML, "Weights");
1147  gTools().ReadAttr(matrixXML, "rows", width);
1148 
1149  fNet.AddLayer(width, f);
1150  TMatrixT<Double_t> weights(width, previousWidth);
1151  TMatrixT<Double_t> biases(width, 1);
1152  ReadMatrixXML(layerXML, "Weights", weights);
1153  ReadMatrixXML(layerXML, "Biases", biases);
1154  fNet.GetLayer(i).GetWeights() = weights;
1155  fNet.GetLayer(i).GetBiases() = biases;
1156 
1157  layerXML = gTools().GetNextChild(layerXML);
1158  previousWidth = width;
1159  }
1160 }
1161 
1162 //______________________________________________________________________________
1163 void TMVA::MethodDNN::ReadWeightsFromStream( std::istream & /*istr*/)
1164 {
1165 }
1166 
1167 //______________________________________________________________________________
1169 {
1170  fRanking = new Ranking( GetName(), "Importance" );
1171  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1172  fRanking->AddRank( Rank( GetInputLabel(ivar), 1.0));
1173  }
1174  return fRanking;
1175 }
1176 
1177 //______________________________________________________________________________
1178 void TMVA::MethodDNN::MakeClassSpecific( std::ostream& /*fout*/,
1179  const TString& /*className*/ ) const
1180 {
1181 }
1182 
1183 //______________________________________________________________________________
1185 {
1186  // get help message text
1187  //
1188  // typical length of text line:
1189  // "|--------------------------------------------------------------|"
1190  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1191  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1192 
1193  Log() << Endl;
1194  Log() << col << "--- Short description:" << colres << Endl;
1195  Log() << Endl;
1196  Log() << "The DNN neural network is a feedforward" << Endl;
1197  Log() << "multilayer perceptron impementation. The DNN has a user-" << Endl;
1198  Log() << "defined hidden layer architecture, where the number of input (output)" << Endl;
1199  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1200  Log() << "signal and one background, regression or multiclass). " << Endl;
1201  Log() << Endl;
1202  Log() << col << "--- Performance optimisation:" << colres << Endl;
1203  Log() << Endl;
1204 
1205  const char* txt = "The DNN supports various options to improve performance in terms of training speed and \n \
1206 reduction of overfitting: \n \
1207 \n \
1208  - different training settings can be stacked. Such that the initial training \n\
1209  is done with a large learning rate and a large drop out fraction whilst \n \
1210  in a later stage learning rate and drop out can be reduced. \n \
1211  - drop out \n \
1212  [recommended: \n \
1213  initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \
1214  later training stage: 0.1 or 0.0 for all layers \n \
1215  final training stage: 0.0] \n \
1216  Drop out is a technique where a at each training cycle a fraction of arbitrary \n \
1217  nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \
1218  - L1 and L2 regularization are available \n \
1219  - Minibatches \n \
1220  [recommended 10 - 150] \n \
1221  Arbitrary mini-batch sizes can be chosen. \n \
1222  - Multithreading \n \
1223  [recommended: True] \n \
1224  Multithreading can be turned on. The minibatches are distributed to the available \n \
1225  cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \
1226  \n \
1227  Options: \n \
1228  \"Layout\": \n \
1229  - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \
1230  - meaning: \n \
1231  . two hidden layers (separated by \",\") \n \
1232  . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \
1233  . the activation function for the output layer is LINEAR \n \
1234  . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \
1235  . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \
1236  . the number of nodes in the output layer is determined by the number of output nodes \n \
1237  and can therefore not be chosen freely. \n \
1238  \n \
1239  \"ErrorStrategy\": \n \
1240  - SUMOFSQUARES \n \
1241  The error of the neural net is determined by a sum-of-squares error function \n \
1242  For regression, this is the only possible choice. \n \
1243  - CROSSENTROPY \n \
1244  The error of the neural net is determined by a cross entropy function. The \n \
1245  output values are automatically (internally) transformed into probabilities \n \
1246  using a sigmoid function. \n \
1247  For signal/background classification this is the default choice. \n \
1248  For multiclass using cross entropy more than one or no output classes \n \
1249  can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \
1250  A and C is true, Event 2: C is true, ...) \n \
1251  - MUTUALEXCLUSIVE \n \
1252  In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \
1253  \n \
1254  \"WeightInitialization\" \n \
1255  - XAVIER \n \
1256  [recommended] \n \
1257  \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \
1258  such that the variance of the values of the nodes is preserved for each layer. \n \
1259  - XAVIERUNIFORM \n \
1260  The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \
1261  - LAYERSIZE \n \
1262  Random values scaled by the layer size \n \
1263  \n \
1264  \"TrainingStrategy\" \n \
1265  - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \
1266  - explanation: two stacked training settings separated by \"|\" \n \
1267  . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \
1268  . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \
1269  . LearningRate : \n \
1270  - recommended for classification: 0.1 initially, 1e-4 later \n \
1271  - recommended for regression: 1e-4 and less \n \
1272  . Momentum : \n \
1273  preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \
1274  . Repetitions : \n \
1275  train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \
1276  . ConvergenceSteps : \n \
1277  Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \
1278  of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \
1279  cycle the test sampes are evaluated and thus the convergence is checked) \n \
1280  . BatchSize \n \
1281  Size of the mini-batches. \n \
1282  . TestRepetitions \n \
1283  Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \
1284  . WeightDecay \n \
1285  If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \
1286  . Renormalize \n \
1287  NONE, L1 (|w|) or L2 (w^2) \n \
1288  . DropConfig \n \
1289  Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \
1290  in the DropConfig. \n \
1291  [example: DropConfig=0.0+0.5+0.3 \n \
1292  meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \
1293  in layer 2 \n \
1294  recommended: leave all the nodes turned on for the input layer (layer 0) \n \
1295  turn off half of the nodes in later layers for the initial training; leave all nodes \n \
1296  turned on (0.0) in later training stages] \n \
1297  . DropRepetitions \n \
1298  Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \
1299  [recommended : 1] \n \
1300  . Multithreading \n \
1301  turn on multithreading [recommended: True] \n \
1302  \n";
1303  Log () << txt << Endl;
1304 }
1305 
1306 } // namespace TMVA
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:589
Config & gConfig()
Definition: Config.cxx:43
size_t GetOutputWidth() const
Definition: Net.h:143
An array of TObjects.
Definition: TObjArray.h:39
TXMLEngine & xmlengine()
Definition: Tools.h:278
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2069
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Scalar_t Loss(const Matrix_t &Y) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
Definition: Net.h:305
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:206
void ProcessOptions()
Definition: MethodDNN.cxx:390
#define REGISTER_METHOD(CLASS)
for example
EOutputFunction fOutputFunction
Definition: MethodDNN.h:112
Double_t Eval(Double_t x) const
Definition: TFormula.cxx:2546
Collectable string class.
Definition: TObjString.h:32
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:316
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:334
double T(double x)
Definition: ChebyshevPol.h:34
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: Net.h:377
const char * GetName() const
Definition: MethodBase.h:330
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:680
TH1 * h
Definition: legend2.C:5
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:2031
UInt_t GetNvar() const
Definition: MethodBase.h:340
EAnalysisType
Definition: Types.h:128
void GetHelpMessage() const
Definition: MethodDNN.cxx:1184
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1102
bool fExitFromTraining
Definition: MethodBase.h:443
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1103
size_t GetBatchSize() const
Definition: Net.h:137
Basic string class.
Definition: TString.h:137
typename Architecture_t::Matrix_t Matrix_t
Definition: MethodDNN.h:83
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
Scalar_t GetNFlops()
Definition: Net.h:346
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void MakeClassSpecific(std::ostream &, const TString &) const
Definition: MethodDNN.cxx:1178
const Bool_t kFALSE
Definition: Rtypes.h:92
Definition: Pattern.h:7
ELossFunction GetLossFunction() const
Definition: Net.h:140
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1100
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:558
void SetWeightDecay(Scalar_t weightDecay)
Definition: Net.h:151
neural net
Definition: NeuralNet.h:1068
LayoutVector_t fLayout
Definition: MethodDNN.h:119
size_t GetDepth() const
Definition: Net.h:136
const char * Data() const
Definition: TString.h:349
EInitialization
Definition: Functions.h:68
void AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Add a layer of the given size to the neural net.
Definition: Net.h:224
Tools & gTools()
Definition: Tools.cxx:79
std::vector< std::map< TString, TString >> KeyValueVector_t
Definition: MethodDNN.h:88
void ReadWeightsFromStream(std::istream &i)
Definition: MethodDNN.cxx:1163
void Initialize(EInitialization m)
Initialize the weights in the net with the initialization method.
Definition: Net.h:253
static const std::string separator("@@@")
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1290
std::vector< Double_t > dropoutProbabilities
Definition: MethodDNN.h:99
UInt_t GetNTargets() const
Definition: MethodBase.h:342
const Int_t kFatal
Definition: TError.h:44
void ClearGraphs()
This function sets the point number to 0 for all graphs.
Definition: MethodBase.cxx:194
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDNN.cxx:1112
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1099
Generic neural network class.
Definition: Net.h:49
TString fErrorStrategy
Definition: MethodDNN.h:115
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:171
TString fArchitectureString
Definition: MethodDNN.h:118
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1965
static void ReadMatrixXML(void *xml, const char *name, TMatrixT< Double_t > &X)
Definition: MethodDNN.h:200
UInt_t fIPyCurrentIter
Definition: MethodBase.h:444
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:305
EInitialization fWeightInitialization
Definition: MethodDNN.h:111
UInt_t GetNVariables() const
Definition: MethodBase.h:341
KeyValueVector_t fSettings
Definition: MethodDNN.h:123
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1468
virtual ~MethodDNN()
Definition: MethodDNN.cxx:98
TString GetString() const
Definition: TObjString.h:50
TRandom2 r(17)
TDataLoader.
Definition: DataLoader.h:72
The F O R M U L A class.
Definition: TFormula.h:89
TString fLayoutString
Definition: MethodDNN.h:114
const Ranking * CreateRanking()
Definition: MethodDNN.cxx:1168
UInt_t fIPyMaxIter
Definition: MethodBase.h:444
void SetRegularization(ERegularization R)
Definition: Net.h:149
unsigned int UInt_t
Definition: RtypesCore.h:42
void Clear()
Remove all layers from the network.
Definition: Net.h:238
const Event * GetEvent() const
Definition: MethodBase.h:745
size_t GetInputWidth() const
Definition: Net.h:142
const Handle_t kNone
Definition: GuiTypes.h:89
static void WriteMatrixXML(void *parent, const char *name, const TMatrixT< Double_t > &X)
Definition: MethodDNN.h:177
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Definition: MethodDNN.cxx:105
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:204
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:356
TLine * l
Definition: textangle.C:4
TSubString Strip(EStripType s=kTrailing, char c= ' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1070
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
Settings for the training of the neural net.
Definition: NeuralNet.h:736
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2241
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
Definition: Net.h:211
Layer defines the layout of a layer.
Definition: NeuralNet.h:676
std::vector< std::pair< int, EActivationFunction >> LayoutVector_t
Definition: MethodDNN.h:87
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:488
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Definition: MethodDNN.cxx:251
#define NONE
Definition: Rotated.cxx:52
void Copy(void *source, void *dest)
#define ClassImp(name)
Definition: Rtypes.h:279
double f(double x)
ModeOutputValues
Definition: NeuralNet.h:179
double Double_t
Definition: RtypesCore.h:55
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDNN.cxx:293
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:592
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:705
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:43
int type
Definition: TGX11.cxx:120
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:53
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
MsgLogger & Log() const
Definition: Configurable.h:128
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void AddPreDefVal(const T &)
Definition: Configurable.h:174
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:346
TString fTrainingStrategyString
Definition: MethodDNN.h:116
TString fWeightInitializationString
Definition: MethodDNN.h:117
void ExitFromTraining()
Definition: MethodBase.h:458
void DeclareOptions()
Definition: MethodDNN.cxx:125
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
std::vector< TTrainingSettings > fTrainingSettings
Definition: MethodDNN.h:120
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1101
Abstract ClassifierFactory template that handles arbitrary types.
Ranking * fRanking
Definition: MethodBase.h:581
std::vector< Float_t > & GetValues()
Definition: Event.h:96
IPythonInteractive * fInteractive
Definition: MethodBase.h:442
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDNN.cxx:1026
void InitializeGradients()
Initialize the gradients in the net to zero.
Definition: Net.h:262
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xml node
Definition: TXMLEngine.cxx:993
void SetLossFunction(ELossFunction J)
Definition: Net.h:150
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Definition: MethodDNN.cxx:74
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:614
void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f)
Compute the neural network predictionion obtained from forwarding the batch X through the neural netw...
Definition: Net.h:328
void SetInputWidth(size_t inputWidth)
Definition: Net.h:148
LayoutVector_t ParseLayoutString(TString layerSpec)
Definition: MethodDNN.cxx:185
#define NULL
Definition: Rtypes.h:82
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
Bool_t WriteOptionsReference() const
Definition: Config.h:66
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:591
const int nn
EnumRegularization
Definition: NeuralNet.h:173
static void output(int code)
Definition: gifencode.c:226
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDNN.cxx:1080
const Bool_t kTRUE
Definition: Rtypes.h:91
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDNN.cxx:1042
void AddWeightsXMLTo(void *parent) const
Definition: MethodDNN.cxx:1087
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:467
Layer_t & GetLayer(size_t i)
Definition: Net.h:138
void SetBatchSize(size_t batchSize)
Definition: Net.h:147
if(line.BeginsWith("/*"))
Definition: HLFactory.cxx:443
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const