Logo ROOT  
Reference Guide
MethodDNN.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Peter Speckmayer
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodDNN *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * A neural network implementation *
12 * *
13 * Authors (alphabetical): *
14 * Simon Pfreundschuh <s.pfreundschuh@gmail.com> - CERN, Switzerland *
15 * Peter Speckmayer <peter.speckmayer@gmx.ch> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28/*! \class TMVA::MethodDNN
29\ingroup TMVA
30Deep Neural Network Implementation.
31*/
32
33#include "TMVA/MethodDNN.h"
34
35#include "TString.h"
36#include "TFormula.h"
37#include "TObjString.h"
38
40#include "TMVA/Configurable.h"
41#include "TMVA/IMethod.h"
42#include "TMVA/MsgLogger.h"
43#include "TMVA/MethodBase.h"
44#include "TMVA/Timer.h"
45#include "TMVA/Types.h"
46#include "TMVA/Tools.h"
47#include "TMVA/Config.h"
48#include "TMVA/Ranking.h"
49
50#include "TMVA/DNN/Net.h"
52
53#include "TMVA/NeuralNet.h"
54#include "TMVA/Monitoring.h"
55
56#ifdef R__HAS_TMVACPU
58#endif
59#ifdef R__HAS_TMVAGPU
61#endif
62
63#include <algorithm>
64#include <iostream>
65#include <string>
66#include <iomanip>
67
69
71
72namespace TMVA
73{
74 using namespace DNN;
75
76 ////////////////////////////////////////////////////////////////////////////////
77 /// standard constructor
78
79 TMVA::MethodDNN::MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData,
80 const TString &theOption)
81 : MethodBase(jobName, Types::kDNN, methodTitle, theData, theOption), fWeightInitialization(), fOutputFunction(),
82 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
83 fArchitectureString(), fTrainingSettings(), fResume(false), fSettings()
84 {
85}
86
87////////////////////////////////////////////////////////////////////////////////
88/// constructor from a weight file
89
90TMVA::MethodDNN::MethodDNN(DataSetInfo& theData,
91 const TString& theWeightFile)
92 : MethodBase( Types::kDNN, theData, theWeightFile),
93 fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
94 fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
95 fTrainingSettings(), fResume(false), fSettings()
96{
97 fWeightInitialization = DNN::EInitialization::kGauss;
98 fOutputFunction = DNN::EOutputFunction::kSigmoid;
99}
100
101////////////////////////////////////////////////////////////////////////////////
102/// destructor
103
105{
108}
109
110////////////////////////////////////////////////////////////////////////////////
111/// MLP can handle classification with 2 classes and regression with
112/// one regression-target
113
115 UInt_t numberClasses,
116 UInt_t /*numberTargets*/ )
117{
118 if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
119 if (type == Types::kMulticlass ) return kTRUE;
120 if (type == Types::kRegression ) return kTRUE;
121
122 return kFALSE;
123}
124
125////////////////////////////////////////////////////////////////////////////////
126/// default initializations
127
129 Log() << kWARNING
130 << "MethodDNN is deprecated and it will be removed in future ROOT version. "
131 "Please use MethodDL ( TMVA::kDL)"
132 << Endl;
133
134}
135
136////////////////////////////////////////////////////////////////////////////////
137/// Options to be set in the option string:
138///
139/// - LearningRate <float> DNN learning rate parameter.
140/// - DecayRate <float> Decay rate for learning parameter.
141/// - TestRate <int> Period of validation set error computation.
142/// - BatchSize <int> Number of event per batch.
143///
144/// - ValidationSize <string> How many events to use for validation. "0.2"
145/// or "20%" indicates that a fifth of the
146/// training data should be used. "100"
147/// indicates that 100 events should be used.
148
150{
151
152 DeclareOptionRef(fLayoutString="SOFTSIGN|(N+100)*2,LINEAR",
153 "Layout",
154 "Layout of the network.");
155
156 DeclareOptionRef(fValidationSize = "20%", "ValidationSize",
157 "Part of the training data to use for "
158 "validation. Specify as 0.2 or 20% to use a "
159 "fifth of the data set as validation set. "
160 "Specify as 100 to use exactly 100 events. "
161 "(Default: 20%)");
162
163 DeclareOptionRef(fErrorStrategy="CROSSENTROPY",
164 "ErrorStrategy",
165 "Loss function: Mean squared error (regression)"
166 " or cross entropy (binary classification).");
167 AddPreDefVal(TString("CROSSENTROPY"));
168 AddPreDefVal(TString("SUMOFSQUARES"));
169 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
170
171 DeclareOptionRef(fWeightInitializationString="XAVIER",
172 "WeightInitialization",
173 "Weight initialization strategy");
174 AddPreDefVal(TString("XAVIER"));
175 AddPreDefVal(TString("XAVIERUNIFORM"));
176
177 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
178 AddPreDefVal(TString("STANDARD"));
179 AddPreDefVal(TString("CPU"));
180 AddPreDefVal(TString("GPU"));
181 AddPreDefVal(TString("OPENCL"));
182
183 DeclareOptionRef(
184 fTrainingStrategyString = "LearningRate=1e-1,"
185 "Momentum=0.3,"
186 "Repetitions=3,"
187 "ConvergenceSteps=50,"
188 "BatchSize=30,"
189 "TestRepetitions=7,"
190 "WeightDecay=0.0,"
191 "Renormalize=L2,"
192 "DropConfig=0.0,"
193 "DropRepetitions=5|LearningRate=1e-4,"
194 "Momentum=0.3,"
195 "Repetitions=3,"
196 "ConvergenceSteps=50,"
197 "BatchSize=20,"
198 "TestRepetitions=7,"
199 "WeightDecay=0.001,"
200 "Renormalize=L2,"
201 "DropConfig=0.0+0.5+0.5,"
202 "DropRepetitions=5,"
203 "Multithreading=True",
204 "TrainingStrategy",
205 "Defines the training strategies.");
206}
207
208////////////////////////////////////////////////////////////////////////////////
209/// parse layout specification string and return a vector, each entry
210/// containing the number of neurons to go in each successive layer
211
213 -> LayoutVector_t
214{
215 LayoutVector_t layout;
216 const TString layerDelimiter(",");
217 const TString subDelimiter("|");
218
219 const size_t inputSize = GetNvar();
220
221 TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
222 TIter nextLayer (layerStrings);
223 TObjString* layerString = (TObjString*)nextLayer ();
224
225 for (; layerString != nullptr; layerString = (TObjString*) nextLayer()) {
226 int numNodes = 0;
228
229 TObjArray* subStrings = layerString->GetString().Tokenize(subDelimiter);
230 TIter nextToken (subStrings);
231 TObjString* token = (TObjString *) nextToken();
232 int idxToken = 0;
233 for (; token != nullptr; token = (TObjString *) nextToken()) {
234 switch (idxToken)
235 {
236 case 0:
237 {
238 TString strActFnc (token->GetString ());
239 if (strActFnc == "RELU") {
240 activationFunction = DNN::EActivationFunction::kRelu;
241 } else if (strActFnc == "TANH") {
242 activationFunction = DNN::EActivationFunction::kTanh;
243 } else if (strActFnc == "SYMMRELU") {
244 activationFunction = DNN::EActivationFunction::kSymmRelu;
245 } else if (strActFnc == "SOFTSIGN") {
246 activationFunction = DNN::EActivationFunction::kSoftSign;
247 } else if (strActFnc == "SIGMOID") {
248 activationFunction = DNN::EActivationFunction::kSigmoid;
249 } else if (strActFnc == "LINEAR") {
250 activationFunction = DNN::EActivationFunction::kIdentity;
251 } else if (strActFnc == "GAUSS") {
252 activationFunction = DNN::EActivationFunction::kGauss;
253 }
254 }
255 break;
256 case 1: // number of nodes
257 {
258 TString strNumNodes (token->GetString ());
259 TString strN ("x");
260 strNumNodes.ReplaceAll ("N", strN);
261 strNumNodes.ReplaceAll ("n", strN);
262 TFormula fml ("tmp",strNumNodes);
263 numNodes = fml.Eval (inputSize);
264 }
265 break;
266 }
267 ++idxToken;
268 }
269 layout.push_back(std::make_pair(numNodes, activationFunction));
270 }
271 return layout;
272}
273
274////////////////////////////////////////////////////////////////////////////////
275/// parse key value pairs in blocks -> return vector of blocks with map of key value pairs
276
278 TString blockDelim,
279 TString tokenDelim)
280 -> KeyValueVector_t
281{
282 KeyValueVector_t blockKeyValues;
283 const TString keyValueDelim ("=");
284
285 TObjArray* blockStrings = parseString.Tokenize (blockDelim);
286 TIter nextBlock (blockStrings);
287 TObjString* blockString = (TObjString *) nextBlock();
288
289 for (; blockString != nullptr; blockString = (TObjString *) nextBlock())
290 {
291 blockKeyValues.push_back (std::map<TString,TString>());
292 std::map<TString,TString>& currentBlock = blockKeyValues.back ();
293
294 TObjArray* subStrings = blockString->GetString ().Tokenize (tokenDelim);
295 TIter nextToken (subStrings);
296 TObjString* token = (TObjString*)nextToken ();
297
298 for (; token != nullptr; token = (TObjString *)nextToken())
299 {
300 TString strKeyValue (token->GetString ());
301 int delimPos = strKeyValue.First (keyValueDelim.Data ());
302 if (delimPos <= 0)
303 continue;
304
305 TString strKey = TString (strKeyValue (0, delimPos));
306 strKey.ToUpper();
307 TString strValue = TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
308
309 strKey.Strip (TString::kBoth, ' ');
310 strValue.Strip (TString::kBoth, ' ');
311
312 currentBlock.insert (std::make_pair (strKey, strValue));
313 }
314 }
315 return blockKeyValues;
316}
317
318////////////////////////////////////////////////////////////////////////////////
319
320TString fetchValue (const std::map<TString, TString>& keyValueMap, TString key)
321{
322 key.ToUpper ();
323 std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
324 if (it == keyValueMap.end()) {
325 return TString ("");
326 }
327 return it->second;
328}
329
330////////////////////////////////////////////////////////////////////////////////
331
332template <typename T>
333T fetchValue(const std::map<TString,TString>& keyValueMap,
334 TString key,
335 T defaultValue);
336
337////////////////////////////////////////////////////////////////////////////////
338
339template <>
340int fetchValue(const std::map<TString,TString>& keyValueMap,
341 TString key,
342 int defaultValue)
343{
344 TString value (fetchValue (keyValueMap, key));
345 if (value == "") {
346 return defaultValue;
347 }
348 return value.Atoi ();
349}
350
351////////////////////////////////////////////////////////////////////////////////
352
353template <>
354double fetchValue (const std::map<TString,TString>& keyValueMap,
355 TString key, double defaultValue)
356{
357 TString value (fetchValue (keyValueMap, key));
358 if (value == "") {
359 return defaultValue;
360 }
361 return value.Atof ();
362}
363
364////////////////////////////////////////////////////////////////////////////////
365
366template <>
367TString fetchValue (const std::map<TString,TString>& keyValueMap,
368 TString key, TString defaultValue)
369{
370 TString value (fetchValue (keyValueMap, key));
371 if (value == "") {
372 return defaultValue;
373 }
374 return value;
375}
376
377////////////////////////////////////////////////////////////////////////////////
378
379template <>
380bool fetchValue (const std::map<TString,TString>& keyValueMap,
381 TString key, bool defaultValue)
382{
383 TString value (fetchValue (keyValueMap, key));
384 if (value == "") {
385 return defaultValue;
386 }
387 value.ToUpper ();
388 if (value == "TRUE" || value == "T" || value == "1") {
389 return true;
390 }
391 return false;
392}
393
394////////////////////////////////////////////////////////////////////////////////
395
396template <>
397std::vector<double> fetchValue(const std::map<TString, TString> & keyValueMap,
398 TString key,
399 std::vector<double> defaultValue)
400{
401 TString parseString (fetchValue (keyValueMap, key));
402 if (parseString == "") {
403 return defaultValue;
404 }
405 parseString.ToUpper ();
406 std::vector<double> values;
407
408 const TString tokenDelim ("+");
409 TObjArray* tokenStrings = parseString.Tokenize (tokenDelim);
410 TIter nextToken (tokenStrings);
411 TObjString* tokenString = (TObjString*)nextToken ();
412 for (; tokenString != NULL; tokenString = (TObjString*)nextToken ()) {
413 std::stringstream sstr;
414 double currentValue;
415 sstr << tokenString->GetString ().Data ();
416 sstr >> currentValue;
417 values.push_back (currentValue);
418 }
419 return values;
420}
421
422////////////////////////////////////////////////////////////////////////////////
423
425{
426 if (IgnoreEventsWithNegWeightsInTraining()) {
427 Log() << kINFO
428 << "Will ignore negative events in training!"
429 << Endl;
430 }
431
432 if (fArchitectureString == "STANDARD") {
433 Log() << kERROR << "The STANDARD architecture has been deprecated. "
434 "Please use Architecture=CPU or Architecture=CPU."
435 "See the TMVA Users' Guide for instructions if you "
436 "encounter problems."
437 << Endl;
438 Log() << kFATAL << "The STANDARD architecture has been deprecated. "
439 "Please use Architecture=CPU or Architecture=CPU."
440 "See the TMVA Users' Guide for instructions if you "
441 "encounter problems."
442 << Endl;
443 }
444
445 if (fArchitectureString == "OPENCL") {
446 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
447 "Please use Architecture=CPU or Architecture=CPU for the "
448 "time being. See the TMVA Users' Guide for instructions "
449 "if you encounter problems."
450 << Endl;
451 Log() << kFATAL << "The OPENCL architecture has not been implemented yet. "
452 "Please use Architecture=CPU or Architecture=CPU for the "
453 "time being. See the TMVA Users' Guide for instructions "
454 "if you encounter problems."
455 << Endl;
456 }
457
458 if (fArchitectureString == "GPU") {
459#ifndef DNNCUDA // Included only if DNNCUDA flag is _not_ set.
460 Log() << kERROR << "CUDA backend not enabled. Please make sure "
461 "you have CUDA installed and it was successfully "
462 "detected by CMAKE."
463 << Endl;
464 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
465 "you have CUDA installed and it was successfully "
466 "detected by CMAKE."
467 << Endl;
468#endif // DNNCUDA
469 }
470
471 if (fArchitectureString == "CPU") {
472#ifndef DNNCPU // Included only if DNNCPU flag is _not_ set.
473 Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
474 "you have a BLAS implementation and it was successfully "
475 "detected by CMake as well that the imt CMake flag is set."
476 << Endl;
477 Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
478 "you have a BLAS implementation and it was successfully "
479 "detected by CMake as well that the imt CMake flag is set."
480 << Endl;
481#endif // DNNCPU
482 }
483
484 //
485 // Set network structure.
486 //
487
488 fLayout = TMVA::MethodDNN::ParseLayoutString (fLayoutString);
489 size_t inputSize = GetNVariables ();
490 size_t outputSize = 1;
491 if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
492 outputSize = GetNTargets();
493 } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
494 outputSize = DataInfo().GetNClasses();
495 }
496
497 fNet.SetBatchSize(1);
498 fNet.SetInputWidth(inputSize);
499
500 auto itLayout = std::begin (fLayout);
501 auto itLayoutEnd = std::end (fLayout)-1;
502 for ( ; itLayout != itLayoutEnd; ++itLayout) {
503 fNet.AddLayer((*itLayout).first, (*itLayout).second);
504 }
505 fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
506
507 //
508 // Loss function and output.
509 //
510
511 fOutputFunction = EOutputFunction::kSigmoid;
512 if (fAnalysisType == Types::kClassification)
513 {
514 if (fErrorStrategy == "SUMOFSQUARES") {
515 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
516 }
517 if (fErrorStrategy == "CROSSENTROPY") {
518 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
519 }
520 fOutputFunction = EOutputFunction::kSigmoid;
521 } else if (fAnalysisType == Types::kRegression) {
522 if (fErrorStrategy != "SUMOFSQUARES") {
523 Log () << kWARNING << "For regression only SUMOFSQUARES is a valid "
524 << " neural net error function. Setting error function to "
525 << " SUMOFSQUARES now." << Endl;
526 }
527 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
528 fOutputFunction = EOutputFunction::kIdentity;
529 } else if (fAnalysisType == Types::kMulticlass) {
530 if (fErrorStrategy == "SUMOFSQUARES") {
531 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
532 }
533 if (fErrorStrategy == "CROSSENTROPY") {
534 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
535 }
536 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
537 fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
538 }
539 fOutputFunction = EOutputFunction::kSoftmax;
540 }
541
542 //
543 // Initialization
544 //
545
546 if (fWeightInitializationString == "XAVIER") {
547 fWeightInitialization = DNN::EInitialization::kGauss;
548 }
549 else if (fWeightInitializationString == "XAVIERUNIFORM") {
550 fWeightInitialization = DNN::EInitialization::kUniform;
551 }
552 else {
553 fWeightInitialization = DNN::EInitialization::kGauss;
554 }
555
556 //
557 // Training settings.
558 //
559
560 // Force validation of the ValidationSize option
561 GetNumValidationSamples();
562
563 KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString,
564 TString ("|"),
565 TString (","));
566
567 std::cout << "Parsed Training DNN string " << fTrainingStrategyString << std::endl;
568 std::cout << "STring has size " << strategyKeyValues.size() << std::endl;
569 for (auto& block : strategyKeyValues) {
570 TTrainingSettings settings;
571
572 settings.convergenceSteps = fetchValue(block, "ConvergenceSteps", 100);
573 settings.batchSize = fetchValue(block, "BatchSize", 30);
574 settings.testInterval = fetchValue(block, "TestRepetitions", 7);
575 settings.weightDecay = fetchValue(block, "WeightDecay", 0.0);
576 settings.learningRate = fetchValue(block, "LearningRate", 1e-5);
577 settings.momentum = fetchValue(block, "Momentum", 0.3);
578 settings.dropoutProbabilities = fetchValue(block, "DropConfig",
579 std::vector<Double_t>());
580
581 TString regularization = fetchValue(block, "Regularization",
582 TString ("NONE"));
583 if (regularization == "L1") {
585 } else if (regularization == "L2") {
587 } else {
589 }
590
591 TString strMultithreading = fetchValue(block, "Multithreading",
592 TString ("True"));
593 if (strMultithreading.BeginsWith ("T")) {
594 settings.multithreading = true;
595 } else {
596 settings.multithreading = false;
597 }
598
599 fTrainingSettings.push_back(settings);
600 }
601}
602
603////////////////////////////////////////////////////////////////////////////////
604/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
605/// 100 etc.
606/// - 20% and 0.2 selects 20% of the training set as validation data.
607/// - 100 selects 100 events as the validation data.
608///
609/// @return number of samples in validation set
610///
611
613{
614 Int_t nValidationSamples = 0;
615 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
616
617 // Parsing + Validation
618 // --------------------
619 if (fValidationSize.EndsWith("%")) {
620 // Relative spec. format 20%
621 TString intValStr = TString(fValidationSize.Strip(TString::kTrailing, '%'));
622
623 if (intValStr.IsFloat()) {
624 Double_t valSizeAsDouble = fValidationSize.Atof() / 100.0;
625 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
626 } else {
627 Log() << kFATAL << "Cannot parse number \"" << fValidationSize
628 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
629 }
630 } else if (fValidationSize.IsFloat()) {
631 Double_t valSizeAsDouble = fValidationSize.Atof();
632
633 if (valSizeAsDouble < 1.0) {
634 // Relative spec. format 0.2
635 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
636 } else {
637 // Absolute spec format 100 or 100.0
638 nValidationSamples = valSizeAsDouble;
639 }
640 } else {
641 Log() << kFATAL << "Cannot parse number \"" << fValidationSize << "\". Expected string like \"0.2\" or \"100\"."
642 << Endl;
643 }
644
645 // Value validation
646 // ----------------
647 if (nValidationSamples < 0) {
648 Log() << kFATAL << "Validation size \"" << fValidationSize << "\" is negative." << Endl;
649 }
650
651 if (nValidationSamples == 0) {
652 Log() << kFATAL << "Validation size \"" << fValidationSize << "\" is zero." << Endl;
653 }
654
655 if (nValidationSamples >= (Int_t)trainingSetSize) {
656 Log() << kFATAL << "Validation size \"" << fValidationSize
657 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
658 }
659
660 return nValidationSamples;
661}
662
663////////////////////////////////////////////////////////////////////////////////
664
666{
667 if (fInteractive && fInteractive->NotInitialized()){
668 std::vector<TString> titles = {"Error on training set", "Error on test set"};
669 fInteractive->Init(titles);
670 // JsMVA progress bar maximum (100%)
671 fIPyMaxIter = 100;
672 }
673
674 for (TTrainingSettings & settings : fTrainingSettings) {
675 size_t nValidationSamples = GetNumValidationSamples();
676 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
677 size_t nTestSamples = nValidationSamples;
678
679 if (nTrainingSamples < settings.batchSize ||
680 nValidationSamples < settings.batchSize ||
681 nTestSamples < settings.batchSize) {
682 Log() << kFATAL << "Number of samples in the datasets are train: "
683 << nTrainingSamples << " valid: " << nValidationSamples
684 << " test: " << nTestSamples << ". "
685 << "One of these is smaller than the batch size of "
686 << settings.batchSize << ". Please increase the batch"
687 << " size to be at least the same size as the smallest"
688 << " of these values." << Endl;
689 }
690 }
691
692 if (fArchitectureString == "GPU") {
693 TrainGpu();
694 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
695 ExitFromTraining();
696 return;
697 } else if (fArchitectureString == "OpenCL") {
698 Log() << kFATAL << "OpenCL backend not yet supported." << Endl;
699 return;
700 } else if (fArchitectureString == "CPU") {
701 TrainCpu();
702 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
703 ExitFromTraining();
704 return;
705 }
706
707 Log() << kINFO << "Using Standard Implementation.";
708
709 std::vector<Pattern> trainPattern;
710 std::vector<Pattern> testPattern;
711
712 size_t nValidationSamples = GetNumValidationSamples();
713 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
714
715 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
716 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
717 const std::vector<TMVA::Event *> eventCollectionTesting{allData.begin() + nTrainingSamples, allData.end()};
718
719 for (auto &event : eventCollectionTraining) {
720 const std::vector<Float_t>& values = event->GetValues();
721 if (fAnalysisType == Types::kClassification) {
722 double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
723 trainPattern.push_back(Pattern (values.begin(),
724 values.end(),
725 outputValue,
726 event->GetWeight()));
727 trainPattern.back().addInput(1.0);
728 } else if (fAnalysisType == Types::kMulticlass) {
729 std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
730 oneHot[event->GetClass()] = 1.0;
731 trainPattern.push_back(Pattern (values.begin(), values.end(),
732 oneHot.cbegin(), oneHot.cend(),
733 event->GetWeight()));
734 trainPattern.back().addInput(1.0);
735 } else {
736 const std::vector<Float_t>& targets = event->GetTargets ();
737 trainPattern.push_back(Pattern(values.begin(),
738 values.end(),
739 targets.begin(),
740 targets.end(),
741 event->GetWeight ()));
742 trainPattern.back ().addInput (1.0); // bias node
743 }
744 }
745
746 for (auto &event : eventCollectionTesting) {
747 const std::vector<Float_t>& values = event->GetValues();
748 if (fAnalysisType == Types::kClassification) {
749 double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
750 testPattern.push_back(Pattern (values.begin(),
751 values.end(),
752 outputValue,
753 event->GetWeight()));
754 testPattern.back().addInput(1.0);
755 } else if (fAnalysisType == Types::kMulticlass) {
756 std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
757 oneHot[event->GetClass()] = 1.0;
758 testPattern.push_back(Pattern (values.begin(), values.end(),
759 oneHot.cbegin(), oneHot.cend(),
760 event->GetWeight()));
761 testPattern.back().addInput(1.0);
762 } else {
763 const std::vector<Float_t>& targets = event->GetTargets ();
764 testPattern.push_back(Pattern(values.begin(),
765 values.end(),
766 targets.begin(),
767 targets.end(),
768 event->GetWeight ()));
769 testPattern.back ().addInput (1.0); // bias node
770 }
771 }
772
774 std::vector<double> weights;
775
776 net.SetIpythonInteractive(fInteractive, &fExitFromTraining, &fIPyMaxIter, &fIPyCurrentIter);
777
778 net.setInputSize(fNet.GetInputWidth() + 1);
779 net.setOutputSize(fNet.GetOutputWidth() + 1);
780
781 for (size_t i = 0; i < fNet.GetDepth(); i++) {
782 EActivationFunction f = fNet.GetLayer(i).GetActivationFunction();
783 EnumFunction g = EnumFunction::LINEAR;
784 switch(f) {
785 case EActivationFunction::kIdentity: g = EnumFunction::LINEAR; break;
786 case EActivationFunction::kRelu: g = EnumFunction::RELU; break;
787 case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID; break;
788 case EActivationFunction::kTanh: g = EnumFunction::TANH; break;
789 case EActivationFunction::kFastTanh: g = EnumFunction::TANH; break;
790 case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU; break;
791 case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN; break;
792 case EActivationFunction::kGauss: g = EnumFunction::GAUSS; break;
793 }
794 if (i < fNet.GetDepth() - 1) {
795 net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g));
796 } else {
797 ModeOutputValues h = ModeOutputValues::DIRECT;
798 switch(fOutputFunction) {
799 case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT; break;
800 case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID; break;
801 case EOutputFunction::kSoftmax: h = ModeOutputValues::SOFTMAX; break;
802 }
803 net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g, h));
804 }
805 }
806
807 switch(fNet.GetLossFunction()) {
808 case ELossFunction::kMeanSquaredError:
809 net.setErrorFunction(ModeErrorFunction::SUMOFSQUARES);
810 break;
812 net.setErrorFunction(ModeErrorFunction::CROSSENTROPY);
813 break;
814 case ELossFunction::kSoftmaxCrossEntropy:
815 net.setErrorFunction(ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE);
816 break;
817 }
818
819 switch(fWeightInitialization) {
821 net.initializeWeights(WeightInitializationStrategy::XAVIER,
822 std::back_inserter(weights));
823 break;
825 net.initializeWeights(WeightInitializationStrategy::XAVIERUNIFORM,
826 std::back_inserter(weights));
827 break;
828 default:
829 net.initializeWeights(WeightInitializationStrategy::XAVIER,
830 std::back_inserter(weights));
831 break;
832 }
833
834 int idxSetting = 0;
835 for (auto s : fTrainingSettings) {
836
838 switch(s.regularization) {
840 case ERegularization::kL1: r = EnumRegularization::L1; break;
841 case ERegularization::kL2: r = EnumRegularization::L2; break;
842 }
843
844 Settings * settings = new Settings(TString(), s.convergenceSteps, s.batchSize,
845 s.testInterval, s.weightDecay, r,
846 MinimizerType::fSteepest, s.learningRate,
847 s.momentum, 1, s.multithreading);
848 std::shared_ptr<Settings> ptrSettings(settings);
849 ptrSettings->setMonitoring (0);
850 Log() << kINFO
851 << "Training with learning rate = " << ptrSettings->learningRate ()
852 << ", momentum = " << ptrSettings->momentum ()
853 << ", repetitions = " << ptrSettings->repetitions ()
854 << Endl;
855
856 ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
857 (idxSetting+1)*100.0/(fSettings.size ()));
858
859 const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
860 if (!dropConfig.empty ()) {
861 Log () << kINFO << "Drop configuration" << Endl
862 << " drop repetitions = " << ptrSettings->dropRepetitions()
863 << Endl;
864 }
865
866 int idx = 0;
867 for (auto f : dropConfig) {
868 Log () << kINFO << " Layer " << idx << " = " << f << Endl;
869 ++idx;
870 }
871 Log () << kINFO << Endl;
872
873 DNN::Steepest minimizer(ptrSettings->learningRate(),
874 ptrSettings->momentum(),
875 ptrSettings->repetitions());
876 net.train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
877 ptrSettings.reset();
878 Log () << kINFO << Endl;
879 idxSetting++;
880 }
881 size_t weightIndex = 0;
882 for (size_t l = 0; l < fNet.GetDepth(); l++) {
883 auto & layerWeights = fNet.GetLayer(l).GetWeights();
884 for (Int_t j = 0; j < layerWeights.GetNcols(); j++) {
885 for (Int_t i = 0; i < layerWeights.GetNrows(); i++) {
886 layerWeights(i,j) = weights[weightIndex];
887 weightIndex++;
888 }
889 }
890 auto & layerBiases = fNet.GetLayer(l).GetBiases();
891 if (l == 0) {
892 for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
893 layerBiases(i,0) = weights[weightIndex];
894 weightIndex++;
895 }
896 } else {
897 for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
898 layerBiases(i,0) = 0.0;
899 }
900 }
901 }
902 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
903 ExitFromTraining();
904}
905
906////////////////////////////////////////////////////////////////////////////////
907
909{
910
911#ifdef DNNCUDA // Included only if DNNCUDA flag is set.
912 Log() << kINFO << "Start of neural network training on GPU." << Endl << Endl;
913
914 size_t nValidationSamples = GetNumValidationSamples();
915 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
916 size_t nTestSamples = nValidationSamples;
917
918 Log() << kDEBUG << "Using " << nValidationSamples << " validation samples." << Endl;
919 Log() << kDEBUG << "Using " << nTestSamples << " training samples." << Endl;
920
921 size_t trainingPhase = 1;
922 fNet.Initialize(fWeightInitialization);
923 for (TTrainingSettings & settings : fTrainingSettings) {
924
925 if (fInteractive){
926 fInteractive->ClearGraphs();
927 }
928
929 TNet<TCuda<>> net(settings.batchSize, fNet);
930 net.SetWeightDecay(settings.weightDecay);
931 net.SetRegularization(settings.regularization);
932
933 // Need to convert dropoutprobabilities to conventions used
934 // by backend implementation.
935 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
936 for (auto & p : dropoutVector) {
937 p = 1.0 - p;
938 }
939 net.SetDropoutProbabilities(dropoutVector);
940
941 net.InitializeGradients();
942 auto testNet = net.CreateClone(settings.batchSize);
943
944 Log() << kINFO << "Training phase " << trainingPhase << " of "
945 << fTrainingSettings.size() << ":" << Endl;
946 trainingPhase++;
947
948 using DataLoader_t = TDataLoader<TMVAInput_t, TCuda<>>;
949
950 // Split training data into training and validation set
951 const std::vector<Event *> &allData = GetEventCollection(Types::kTraining);
952 const std::vector<Event *> trainingInputData =
953 std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
954 const std::vector<Event *> testInputData =
955 std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
956
957 if (trainingInputData.size() != nTrainingSamples) {
958 Log() << kFATAL << "Inconsistent training sample size" << Endl;
959 }
960 if (testInputData.size() != nTestSamples) {
961 Log() << kFATAL << "Inconsistent test sample size" << Endl;
962 }
963
964 size_t nThreads = 1;
965 TMVAInput_t trainingTuple = std::tie(trainingInputData, DataInfo());
966 TMVAInput_t testTuple = std::tie(testInputData, DataInfo());
967 DataLoader_t trainingData(trainingTuple, nTrainingSamples,
968 net.GetBatchSize(), net.GetInputWidth(),
969 net.GetOutputWidth(), nThreads);
970 DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
971 net.GetInputWidth(), net.GetOutputWidth(),
972 nThreads);
973 DNN::TGradientDescent<TCuda<>> minimizer(settings.learningRate,
974 settings.convergenceSteps,
975 settings.testInterval);
976
977 std::vector<TNet<TCuda<>>> nets{};
978 std::vector<TBatch<TCuda<>>> batches{};
979 nets.reserve(nThreads);
980 for (size_t i = 0; i < nThreads; i++) {
981 nets.push_back(net);
982 for (size_t j = 0; j < net.GetDepth(); j++)
983 {
984 auto &masterLayer = net.GetLayer(j);
985 auto &layer = nets.back().GetLayer(j);
986 TCuda<>::Copy(layer.GetWeights(),
987 masterLayer.GetWeights());
988 TCuda<>::Copy(layer.GetBiases(),
989 masterLayer.GetBiases());
990 }
991 }
992
993 bool converged = false;
994 size_t stepCount = 0;
995 size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
996
997 std::chrono::time_point<std::chrono::system_clock> start, end;
998 start = std::chrono::system_clock::now();
999
1000 if (!fInteractive) {
1001 Log() << std::setw(10) << "Epoch" << " | "
1002 << std::setw(12) << "Train Err."
1003 << std::setw(12) << "Test Err."
1004 << std::setw(12) << "GFLOP/s"
1005 << std::setw(12) << "Conv. Steps" << Endl;
1006 std::string separator(62, '-');
1007 Log() << separator << Endl;
1008 }
1009
1010 while (!converged)
1011 {
1012 stepCount++;
1013
1014 // Perform minimization steps for a full epoch.
1015 trainingData.Shuffle();
1016 for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1017 batches.clear();
1018 for (size_t j = 0; j < nThreads; j++) {
1019 batches.reserve(nThreads);
1020 batches.push_back(trainingData.GetBatch());
1021 }
1022 if (settings.momentum > 0.0) {
1023 minimizer.StepMomentum(net, nets, batches, settings.momentum);
1024 } else {
1025 minimizer.Step(net, nets, batches);
1026 }
1027 }
1028
1029 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1030
1031 // Compute test error.
1032 Double_t testError = 0.0;
1033 for (auto batch : testData) {
1034 auto inputMatrix = batch.GetInput();
1035 auto outputMatrix = batch.GetOutput();
1036 testError += testNet.Loss(inputMatrix, outputMatrix);
1037 }
1038 testError /= (Double_t) (nTestSamples / settings.batchSize);
1039
1040 //Log the loss value
1041 fTrainHistory.AddValue("testError",stepCount,testError);
1042
1043 end = std::chrono::system_clock::now();
1044
1045 // Compute training error.
1046 Double_t trainingError = 0.0;
1047 for (auto batch : trainingData) {
1048 auto inputMatrix = batch.GetInput();
1049 auto outputMatrix = batch.GetOutput();
1050 trainingError += net.Loss(inputMatrix, outputMatrix);
1051 }
1052 trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
1053 //Log the loss value
1054 fTrainHistory.AddValue("trainingError",stepCount,trainingError);
1055
1056 // Compute numerical throughput.
1057 std::chrono::duration<double> elapsed_seconds = end - start;
1058 double seconds = elapsed_seconds.count();
1059 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1060 nFlops *= net.GetNFlops() * 1e-9;
1061
1062 converged = minimizer.HasConverged(testError);
1063 start = std::chrono::system_clock::now();
1064
1065 if (fInteractive) {
1066 fInteractive->AddPoint(stepCount, trainingError, testError);
1067 fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1068 / minimizer.GetConvergenceSteps ();
1069 if (fExitFromTraining) break;
1070 } else {
1071 Log() << std::setw(10) << stepCount << " | "
1072 << std::setw(12) << trainingError
1073 << std::setw(12) << testError
1074 << std::setw(12) << nFlops / seconds
1075 << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1076 if (converged) {
1077 Log() << Endl;
1078 }
1079 }
1080 }
1081 }
1082 for (size_t l = 0; l < net.GetDepth(); l++) {
1083 fNet.GetLayer(l).GetWeights() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetWeights();
1084 fNet.GetLayer(l).GetBiases() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetBiases();
1085 }
1086 }
1087
1088#else // DNNCUDA flag not set.
1089
1090 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1091 "you have CUDA installed and it was successfully "
1092 "detected by CMAKE." << Endl;
1093#endif // DNNCUDA
1094}
1095
1096////////////////////////////////////////////////////////////////////////////////
1097
1099{
1100
1101#ifdef DNNCPU // Included only if DNNCPU flag is set.
1102 Log() << kINFO << "Start of neural network training on CPU." << Endl << Endl;
1103
1104 size_t nValidationSamples = GetNumValidationSamples();
1105 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1106 size_t nTestSamples = nValidationSamples;
1107
1108 Log() << kDEBUG << "Using " << nValidationSamples << " validation samples." << Endl;
1109 Log() << kDEBUG << "Using " << nTestSamples << " training samples." << Endl;
1110
1111 fNet.Initialize(fWeightInitialization);
1112
1113 size_t trainingPhase = 1;
1114 for (TTrainingSettings & settings : fTrainingSettings) {
1115
1116 if (fInteractive){
1117 fInteractive->ClearGraphs();
1118 }
1119
1120 Log() << "Training phase " << trainingPhase << " of "
1121 << fTrainingSettings.size() << ":" << Endl;
1122 trainingPhase++;
1123
1124 TNet<TCpu<>> net(settings.batchSize, fNet);
1125 net.SetWeightDecay(settings.weightDecay);
1126 net.SetRegularization(settings.regularization);
1127 // Need to convert dropoutprobabilities to conventions used
1128 // by backend implementation.
1129 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1130 for (auto & p : dropoutVector) {
1131 p = 1.0 - p;
1132 }
1133 net.SetDropoutProbabilities(dropoutVector);
1134 net.InitializeGradients();
1135 auto testNet = net.CreateClone(settings.batchSize);
1136
1137 using DataLoader_t = TDataLoader<TMVAInput_t, TCpu<>>;
1138
1139 // Split training data into training and validation set
1140 const std::vector<Event *> &allData = GetEventCollection(Types::kTraining);
1141 const std::vector<Event *> trainingInputData =
1142 std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
1143 const std::vector<Event *> testInputData =
1144 std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
1145
1146 if (trainingInputData.size() != nTrainingSamples) {
1147 Log() << kFATAL << "Inconsistent training sample size" << Endl;
1148 }
1149 if (testInputData.size() != nTestSamples) {
1150 Log() << kFATAL << "Inconsistent test sample size" << Endl;
1151 }
1152
1153 size_t nThreads = 1;
1154 TMVAInput_t trainingTuple = std::tie(trainingInputData, DataInfo());
1155 TMVAInput_t testTuple = std::tie(testInputData, DataInfo());
1156 DataLoader_t trainingData(trainingTuple, nTrainingSamples,
1157 net.GetBatchSize(), net.GetInputWidth(),
1158 net.GetOutputWidth(), nThreads);
1159 DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
1160 net.GetInputWidth(), net.GetOutputWidth(),
1161 nThreads);
1162 DNN::TGradientDescent<TCpu<>> minimizer(settings.learningRate,
1163 settings.convergenceSteps,
1164 settings.testInterval);
1165
1166 std::vector<TNet<TCpu<>>> nets{};
1167 std::vector<TBatch<TCpu<>>> batches{};
1168 nets.reserve(nThreads);
1169 for (size_t i = 0; i < nThreads; i++) {
1170 nets.push_back(net);
1171 for (size_t j = 0; j < net.GetDepth(); j++)
1172 {
1173 auto &masterLayer = net.GetLayer(j);
1174 auto &layer = nets.back().GetLayer(j);
1175 TCpu<>::Copy(layer.GetWeights(),
1176 masterLayer.GetWeights());
1177 TCpu<>::Copy(layer.GetBiases(),
1178 masterLayer.GetBiases());
1179 }
1180 }
1181
1182 bool converged = false;
1183 size_t stepCount = 0;
1184 size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
1185
1186 std::chrono::time_point<std::chrono::system_clock> start, end;
1187 start = std::chrono::system_clock::now();
1188
1189 if (!fInteractive) {
1190 Log() << std::setw(10) << "Epoch" << " | "
1191 << std::setw(12) << "Train Err."
1192 << std::setw(12) << "Test Err."
1193 << std::setw(12) << "GFLOP/s"
1194 << std::setw(12) << "Conv. Steps" << Endl;
1195 std::string separator(62, '-');
1196 Log() << separator << Endl;
1197 }
1198
1199 while (!converged)
1200 {
1201 stepCount++;
1202 // Perform minimization steps for a full epoch.
1203 trainingData.Shuffle();
1204 for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1205 batches.clear();
1206 for (size_t j = 0; j < nThreads; j++) {
1207 batches.reserve(nThreads);
1208 batches.push_back(trainingData.GetBatch());
1209 }
1210 if (settings.momentum > 0.0) {
1211 minimizer.StepMomentum(net, nets, batches, settings.momentum);
1212 } else {
1213 minimizer.Step(net, nets, batches);
1214 }
1215 }
1216
1217 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1218
1219 // Compute test error.
1220 Double_t testError = 0.0;
1221 for (auto batch : testData) {
1222 auto inputMatrix = batch.GetInput();
1223 auto outputMatrix = batch.GetOutput();
1224 auto weightMatrix = batch.GetWeights();
1225 testError += testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
1226 }
1227 testError /= (Double_t) (nTestSamples / settings.batchSize);
1228
1229 //Log the loss value
1230 fTrainHistory.AddValue("testError",stepCount,testError);
1231
1232 end = std::chrono::system_clock::now();
1233
1234 // Compute training error.
1235 Double_t trainingError = 0.0;
1236 for (auto batch : trainingData) {
1237 auto inputMatrix = batch.GetInput();
1238 auto outputMatrix = batch.GetOutput();
1239 auto weightMatrix = batch.GetWeights();
1240 trainingError += net.Loss(inputMatrix, outputMatrix, weightMatrix);
1241 }
1242 trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
1243
1244 //Log the loss value
1245 fTrainHistory.AddValue("trainingError",stepCount,trainingError);
1246
1247 if (fInteractive){
1248 fInteractive->AddPoint(stepCount, trainingError, testError);
1249 fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
1250 if (fExitFromTraining) break;
1251 }
1252
1253 // Compute numerical throughput.
1254 std::chrono::duration<double> elapsed_seconds = end - start;
1255 double seconds = elapsed_seconds.count();
1256 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1257 nFlops *= net.GetNFlops() * 1e-9;
1258
1259 converged = minimizer.HasConverged(testError);
1260 start = std::chrono::system_clock::now();
1261
1262 if (fInteractive) {
1263 fInteractive->AddPoint(stepCount, trainingError, testError);
1264 fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1265 / minimizer.GetConvergenceSteps ();
1266 if (fExitFromTraining) break;
1267 } else {
1268 Log() << std::setw(10) << stepCount << " | "
1269 << std::setw(12) << trainingError
1270 << std::setw(12) << testError
1271 << std::setw(12) << nFlops / seconds
1272 << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1273 if (converged) {
1274 Log() << Endl;
1275 }
1276 }
1277 }
1278 }
1279
1280
1281 for (size_t l = 0; l < net.GetDepth(); l++) {
1282 auto & layer = fNet.GetLayer(l);
1283 layer.GetWeights() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetWeights();
1284 layer.GetBiases() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetBiases();
1285 }
1286 }
1287
1288#else // DNNCPU flag not set.
1289 Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1290 "you have a BLAS implementation and it was successfully "
1291 "detected by CMake as well that the imt CMake flag is set." << Endl;
1292#endif // DNNCPU
1293}
1294
1295////////////////////////////////////////////////////////////////////////////////
1296
1298{
1299 size_t nVariables = GetEvent()->GetNVariables();
1300 Matrix_t X(1, nVariables);
1301 Matrix_t YHat(1, 1);
1302
1303 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1304 for (size_t i = 0; i < nVariables; i++) {
1305 X(0,i) = inputValues[i];
1306 }
1307
1308 fNet.Prediction(YHat, X, fOutputFunction);
1309 return YHat(0,0);
1310}
1311
1312////////////////////////////////////////////////////////////////////////////////
1313
1314const std::vector<Float_t> & TMVA::MethodDNN::GetRegressionValues()
1315{
1316 size_t nVariables = GetEvent()->GetNVariables();
1317 Matrix_t X(1, nVariables);
1318
1319 const Event *ev = GetEvent();
1320 const std::vector<Float_t>& inputValues = ev->GetValues();
1321 for (size_t i = 0; i < nVariables; i++) {
1322 X(0,i) = inputValues[i];
1323 }
1324
1325 size_t nTargets = std::max(1u, ev->GetNTargets());
1326 Matrix_t YHat(1, nTargets);
1327 std::vector<Float_t> output(nTargets);
1328 auto net = fNet.CreateClone(1);
1329 net.Prediction(YHat, X, fOutputFunction);
1330
1331 for (size_t i = 0; i < nTargets; i++)
1332 output[i] = YHat(0, i);
1333
1334 if (fRegressionReturnVal == NULL) {
1335 fRegressionReturnVal = new std::vector<Float_t>();
1336 }
1337 fRegressionReturnVal->clear();
1338
1339 Event * evT = new Event(*ev);
1340 for (size_t i = 0; i < nTargets; ++i) {
1341 evT->SetTarget(i, output[i]);
1342 }
1343
1344 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1345 for (size_t i = 0; i < nTargets; ++i) {
1346 fRegressionReturnVal->push_back(evT2->GetTarget(i));
1347 }
1348 delete evT;
1349 return *fRegressionReturnVal;
1350}
1351
1352const std::vector<Float_t> & TMVA::MethodDNN::GetMulticlassValues()
1353{
1354 size_t nVariables = GetEvent()->GetNVariables();
1355 Matrix_t X(1, nVariables);
1356 Matrix_t YHat(1, DataInfo().GetNClasses());
1357 if (fMulticlassReturnVal == NULL) {
1358 fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1359 }
1360
1361 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1362 for (size_t i = 0; i < nVariables; i++) {
1363 X(0,i) = inputValues[i];
1364 }
1365
1366 fNet.Prediction(YHat, X, fOutputFunction);
1367 for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1368 (*fMulticlassReturnVal)[i] = YHat(0, i);
1369 }
1370 return *fMulticlassReturnVal;
1371}
1372
1373////////////////////////////////////////////////////////////////////////////////
1374
1375void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const
1376{
1377 void* nn = gTools().xmlengine().NewChild(parent, 0, "Weights");
1378 Int_t inputWidth = fNet.GetInputWidth();
1379 Int_t depth = fNet.GetDepth();
1380 char lossFunction = static_cast<char>(fNet.GetLossFunction());
1381 gTools().xmlengine().NewAttr(nn, 0, "InputWidth",
1382 gTools().StringFromInt(inputWidth));
1383 gTools().xmlengine().NewAttr(nn, 0, "Depth", gTools().StringFromInt(depth));
1384 gTools().xmlengine().NewAttr(nn, 0, "LossFunction", TString(lossFunction));
1385 gTools().xmlengine().NewAttr(nn, 0, "OutputFunction",
1386 TString(static_cast<char>(fOutputFunction)));
1387
1388 for (Int_t i = 0; i < depth; i++) {
1389 const auto& layer = fNet.GetLayer(i);
1390 auto layerxml = gTools().xmlengine().NewChild(nn, 0, "Layer");
1391 int activationFunction = static_cast<int>(layer.GetActivationFunction());
1392 gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
1393 TString::Itoa(activationFunction, 10));
1394 WriteMatrixXML(layerxml, "Weights", layer.GetWeights());
1395 WriteMatrixXML(layerxml, "Biases", layer.GetBiases());
1396 }
1397}
1398
1399////////////////////////////////////////////////////////////////////////////////
1400
1402{
1403 auto netXML = gTools().GetChild(rootXML, "Weights");
1404 if (!netXML){
1405 netXML = rootXML;
1406 }
1407
1408 fNet.Clear();
1409 fNet.SetBatchSize(1);
1410
1411 size_t inputWidth, depth;
1412 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1413 gTools().ReadAttr(netXML, "Depth", depth);
1414 char lossFunctionChar;
1415 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1416 char outputFunctionChar;
1417 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1418
1419 fNet.SetInputWidth(inputWidth);
1420 fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1421 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1422
1423 size_t previousWidth = inputWidth;
1424 auto layerXML = gTools().xmlengine().GetChild(netXML, "Layer");
1425 for (size_t i = 0; i < depth; i++) {
1426 TString fString;
1428
1429 // Read activation function.
1430 gTools().ReadAttr(layerXML, "ActivationFunction", fString);
1431 f = static_cast<EActivationFunction>(fString.Atoi());
1432
1433 // Read number of neurons.
1434 size_t width;
1435 auto matrixXML = gTools().GetChild(layerXML, "Weights");
1436 gTools().ReadAttr(matrixXML, "rows", width);
1437
1438 fNet.AddLayer(width, f);
1439 TMatrixT<Double_t> weights(width, previousWidth);
1440 TMatrixT<Double_t> biases(width, 1);
1441 ReadMatrixXML(layerXML, "Weights", weights);
1442 ReadMatrixXML(layerXML, "Biases", biases);
1443 fNet.GetLayer(i).GetWeights() = weights;
1444 fNet.GetLayer(i).GetBiases() = biases;
1445
1446 layerXML = gTools().GetNextChild(layerXML);
1447 previousWidth = width;
1448 }
1449}
1450
1451////////////////////////////////////////////////////////////////////////////////
1452
1453void TMVA::MethodDNN::ReadWeightsFromStream( std::istream & /*istr*/)
1454{
1455}
1456
1457////////////////////////////////////////////////////////////////////////////////
1458
1460{
1461 fRanking = new Ranking( GetName(), "Importance" );
1462 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1463 fRanking->AddRank( Rank( GetInputLabel(ivar), 1.0));
1464 }
1465 return fRanking;
1466}
1467
1468////////////////////////////////////////////////////////////////////////////////
1469
1470void TMVA::MethodDNN::MakeClassSpecific( std::ostream& /*fout*/,
1471 const TString& /*className*/ ) const
1472{
1473}
1474
1475////////////////////////////////////////////////////////////////////////////////
1476
1478{
1479 // get help message text
1480 //
1481 // typical length of text line:
1482 // "|--------------------------------------------------------------|"
1483 TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1484 TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1485
1486 Log() << Endl;
1487 Log() << col << "--- Short description:" << colres << Endl;
1488 Log() << Endl;
1489 Log() << "The DNN neural network is a feedforward" << Endl;
1490 Log() << "multilayer perceptron implementation. The DNN has a user-" << Endl;
1491 Log() << "defined hidden layer architecture, where the number of input (output)" << Endl;
1492 Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1493 Log() << "signal and one background, regression or multiclass). " << Endl;
1494 Log() << Endl;
1495 Log() << col << "--- Performance optimisation:" << colres << Endl;
1496 Log() << Endl;
1497
1498 const char* txt = "The DNN supports various options to improve performance in terms of training speed and \n \
1499reduction of overfitting: \n \
1500\n \
1501 - different training settings can be stacked. Such that the initial training \n\
1502 is done with a large learning rate and a large drop out fraction whilst \n \
1503 in a later stage learning rate and drop out can be reduced. \n \
1504 - drop out \n \
1505 [recommended: \n \
1506 initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \
1507 later training stage: 0.1 or 0.0 for all layers \n \
1508 final training stage: 0.0] \n \
1509 Drop out is a technique where a at each training cycle a fraction of arbitrary \n \
1510 nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \
1511 - L1 and L2 regularization are available \n \
1512 - Minibatches \n \
1513 [recommended 10 - 150] \n \
1514 Arbitrary mini-batch sizes can be chosen. \n \
1515 - Multithreading \n \
1516 [recommended: True] \n \
1517 Multithreading can be turned on. The minibatches are distributed to the available \n \
1518 cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \
1519 \n \
1520 Options: \n \
1521 \"Layout\": \n \
1522 - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \
1523 - meaning: \n \
1524 . two hidden layers (separated by \",\") \n \
1525 . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \
1526 . the activation function for the output layer is LINEAR \n \
1527 . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \
1528 . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \
1529 . the number of nodes in the output layer is determined by the number of output nodes \n \
1530 and can therefore not be chosen freely. \n \
1531 \n \
1532 \"ErrorStrategy\": \n \
1533 - SUMOFSQUARES \n \
1534 The error of the neural net is determined by a sum-of-squares error function \n \
1535 For regression, this is the only possible choice. \n \
1536 - CROSSENTROPY \n \
1537 The error of the neural net is determined by a cross entropy function. The \n \
1538 output values are automatically (internally) transformed into probabilities \n \
1539 using a sigmoid function. \n \
1540 For signal/background classification this is the default choice. \n \
1541 For multiclass using cross entropy more than one or no output classes \n \
1542 can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \
1543 A and C is true, Event 2: C is true, ...) \n \
1544 - MUTUALEXCLUSIVE \n \
1545 In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \
1546 \n \
1547 \"WeightInitialization\" \n \
1548 - XAVIER \n \
1549 [recommended] \n \
1550 \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \
1551 such that the variance of the values of the nodes is preserved for each layer. \n \
1552 - XAVIERUNIFORM \n \
1553 The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \
1554 - LAYERSIZE \n \
1555 Random values scaled by the layer size \n \
1556 \n \
1557 \"TrainingStrategy\" \n \
1558 - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \
1559 - explanation: two stacked training settings separated by \"|\" \n \
1560 . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \
1561 . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \
1562 . LearningRate : \n \
1563 - recommended for classification: 0.1 initially, 1e-4 later \n \
1564 - recommended for regression: 1e-4 and less \n \
1565 . Momentum : \n \
1566 preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \
1567 . Repetitions : \n \
1568 train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \
1569 . ConvergenceSteps : \n \
1570 Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \
1571 of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \
1572 cycle the test samples are evaluated and thus the convergence is checked) \n \
1573 . BatchSize \n \
1574 Size of the mini-batches. \n \
1575 . TestRepetitions \n \
1576 Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \
1577 . WeightDecay \n \
1578 If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \
1579 . Renormalize \n \
1580 NONE, L1 (|w|) or L2 (w^2) \n \
1581 . DropConfig \n \
1582 Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \
1583 in the DropConfig. \n \
1584 [example: DropConfig=0.0+0.5+0.3 \n \
1585 meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \
1586 in layer 2 \n \
1587 recommended: leave all the nodes turned on for the input layer (layer 0) \n \
1588 turn off half of the nodes in later layers for the initial training; leave all nodes \n \
1589 turned on (0.0) in later training stages] \n \
1590 . DropRepetitions \n \
1591 Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \
1592 [recommended : 1] \n \
1593 . Multithreading \n \
1594 turn on multithreading [recommended: True] \n \
1595 \n";
1596 Log () << txt << Endl;
1597}
1598
1599} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
double
Definition: Converters.cxx:939
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define g(i)
Definition: RSha256.hxx:105
#define h(i)
Definition: RSha256.hxx:106
#define e(i)
Definition: RSha256.hxx:103
#define NONE
Definition: Rotated.cxx:52
int Int_t
Definition: RtypesCore.h:45
unsigned int UInt_t
Definition: RtypesCore.h:46
const Bool_t kFALSE
Definition: RtypesCore.h:101
bool Bool_t
Definition: RtypesCore.h:63
double Double_t
Definition: RtypesCore.h:59
const Bool_t kTRUE
Definition: RtypesCore.h:100
#define ClassImp(name)
Definition: Rtypes.h:364
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
int type
Definition: TGX11.cxx:121
Definition: Pattern.h:8
The Formula class.
Definition: TFormula.h:87
Bool_t WriteOptionsReference() const
Definition: Config.h:67
Layer defines the layout of a layer.
Definition: NeuralNet.h:673
neural net
Definition: NeuralNet.h:1062
Settings for the training of the neural net.
Definition: NeuralNet.h:730
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:334
static void Copy(Matrix_t &B, const Matrix_t &A)
Definition: Arithmetic.hxx:269
static void Copy(Matrix_t &B, const Matrix_t &A)
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
Definition: Minimizers.h:668
void Step(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch.
Definition: Minimizers.h:332
size_t GetTestInterval() const
Definition: Minimizers.h:164
void StepMomentum(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum.
Definition: Minimizers.h:439
size_t GetConvergenceCount() const
Definition: Minimizers.h:160
size_t GetConvergenceSteps() const
Definition: Minimizers.h:161
Generic neural network class.
Definition: Net.h:49
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:359
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
std::vector< Float_t > & GetValues()
Definition: Event.h:94
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
Deep Neural Network Implementation.
Definition: MethodDNN.h:77
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDNN.cxx:1352
UInt_t GetNumValidationSamples()
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDNN.cxx:1401
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDNN.h:87
typename Architecture_t::Matrix_t Matrix_t
Definition: MethodDNN.h:82
void ReadWeightsFromStream(std::istream &i)
Definition: MethodDNN.cxx:1453
LayoutVector_t ParseLayoutString(TString layerSpec)
void MakeClassSpecific(std::ostream &, const TString &) const
Definition: MethodDNN.cxx:1470
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
void ProcessOptions()
Definition: MethodDNN.cxx:424
virtual ~MethodDNN()
DNN::EInitialization fWeightInitialization
Definition: MethodDNN.h:112
void DeclareOptions()
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDNN.cxx:1297
const Ranking * CreateRanking()
Definition: MethodDNN.cxx:1459
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
DNN::EOutputFunction fOutputFunction
Definition: MethodDNN.h:113
void AddWeightsXMLTo(void *parent) const
Definition: MethodDNN.cxx:1375
void GetHelpMessage() const
Definition: MethodDNN.cxx:1477
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDNN.cxx:1314
Ranking for variables in method (implementation)
Definition: Ranking.h:48
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TXMLEngine & xmlengine()
Definition: Tools.h:268
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
EAnalysisType
Definition: Types.h:128
@ kMulticlass
Definition: Types.h:131
@ kClassification
Definition: Types.h:129
@ kRegression
Definition: Types.h:130
@ kTraining
Definition: Types.h:145
@ kDEBUG
Definition: Types.h:58
@ kERROR
Definition: Types.h:62
@ kINFO
Definition: Types.h:60
@ kWARNING
Definition: Types.h:61
@ kFATAL
Definition: Types.h:63
TMatrixT.
Definition: TMatrixT.h:39
An array of TObjects.
Definition: TObjArray.h:37
Collectable string class.
Definition: TObjString.h:28
const TString & GetString() const
Definition: TObjString.h:46
Basic string class.
Definition: TString.h:136
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1946
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1131
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:2012
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1816
const char * Data() const
Definition: TString.h:369
@ kTrailing
Definition: TString.h:267
@ kBoth
Definition: TString.h:267
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1163
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2222
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:615
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2050
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:715
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:586
double T(double x)
Definition: ChebyshevPol.h:34
static const std::string separator("@@@")
static constexpr double s
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
EnumRegularization
Definition: NeuralNet.h:173
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
@ fSteepest
SGD.
Definition: NeuralNet.h:322
ModeOutputValues
Definition: NeuralNet.h:179
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDNN.cxx:320
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:760
DNN::ERegularization regularization
Definition: MethodDNN.h:94
std::vector< Double_t > dropoutProbabilities
Definition: MethodDNN.h:98
auto * l
Definition: textangle.C:4
static void output(int code)
Definition: gifencode.c:226