Logo ROOT  
Reference Guide
NeuralNet.h
Go to the documentation of this file.
1/**
2 * @file TMVA/NeuralNet.h
3 * @author Peter Speckmayer
4 * @version 1.0
5 *
6 * @section LICENSE
7 *
8 *
9 * @section Neural net implementation
10 *
11 * An implementation of a neural net for TMVA. This neural net uses multithreading
12 *
13 */
14
15
16//////////////////////////////////////////////////////////////////////////
17// //
18// NeuralNet //
19// //
20// A neural net implementation //
21// //
22//////////////////////////////////////////////////////////////////////////
23
24#ifndef TMVA_NEURAL_NET
25#define TMVA_NEURAL_NET
26#pragma once
27
28#include <vector>
29#include <iostream>
30#include <algorithm>
31#include <iterator>
32#include <functional>
33#include <tuple>
34#include <cmath>
35#include <cassert>
36#include <random>
37#include <thread>
38#include <future>
39#include <type_traits>
40#include <string>
41#include <utility>
42
43#include "Pattern.h"
44#include "Monitoring.h"
45
46#include "TApplication.h"
47#include "Timer.h"
48
49#include "TH1F.h"
50#include "TH2F.h"
51
52#include <fenv.h> // turn on or off exceptions for NaN and other numeric exceptions
53
54
55namespace TMVA
56{
57
58 class IPythonInteractive;
59
60 namespace DNN
61 {
62
63 // double gaussDoubl (edouble mean, double sigma);
64
65
66
67 double gaussDouble (double mean, double sigma);
68 double uniformDouble (double minValue, double maxValue);
69 int randomInt (int maxValue);
70
71
72
73
75 {
76 public:
78 : m_n(0)
79 , m_sumWeights(0)
80 , m_mean(0)
81 , m_squared(0)
82 {}
83
84 inline void clear()
85 {
86 m_n = 0;
87 m_sumWeights = 0;
88 m_mean = 0;
89 m_squared = 0;
90 }
91
92 template <typename T>
93 inline void add(T value, double weight = 1.0)
94 {
95 ++m_n; // a value has been added
96
97 if (m_n == 1) // initialization
98 {
99 m_mean = value;
100 m_squared = 0.0;
101 m_sumWeights = weight;
102 return;
103 }
104
105 double tmpWeight = m_sumWeights+weight;
106 double Q = value - m_mean;
107
108 double R = Q*weight/tmpWeight;
109 m_mean += R;
111
112 m_sumWeights = tmpWeight;
113 }
114
115 template <typename ITERATOR>
116 inline void add (ITERATOR itBegin, ITERATOR itEnd)
117 {
118 for (ITERATOR it = itBegin; it != itEnd; ++it)
119 add (*it);
120 }
121
122
123
124 inline int count() const { return m_n; }
125 inline double weights() const { if(m_n==0) return 0; return m_sumWeights; }
126 inline double mean() const { if(m_n==0) return 0; return m_mean; }
127 inline double var() const
128 {
129 if(m_n==0)
130 return 0;
131 if (m_squared <= 0)
132 return 0;
133 return (m_squared/m_sumWeights);
134 }
135
136 inline double var_corr () const
137 {
138 if (m_n <= 1)
139 return var ();
140
141 return (var()*m_n/(m_n-1)); // unbiased for small sample sizes
142 }
143
144 inline double stdDev_corr () const { return sqrt( var_corr() ); }
145 inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes
146
147 private:
148 size_t m_n;
150 double m_mean;
151 double m_squared;
152 };
153
154
155
156 enum class EnumFunction
157 {
158 ZERO = '0',
159 LINEAR = 'L',
160 TANH = 'T',
161 RELU = 'R',
162 SYMMRELU = 'r',
163 TANHSHIFT = 't',
164 SIGMOID = 's',
165 SOFTSIGN = 'S',
166 GAUSS = 'G',
167 GAUSSCOMPLEMENT = 'C'
168 };
169
170
171
173 {
174 NONE, L1, L2, L1MAX
175 };
176
177
178 enum class ModeOutputValues : int
179 {
180 DIRECT = 0x01,
181 SIGMOID = 0x02,
182 SOFTMAX = 0x04,
183 BATCHNORMALIZATION = 0x08
184 };
185
186
187
189 {
191 }
192
194 {
196 return lhs;
197 }
198
200 {
202 }
203
205 {
207 return lhs;
208 }
209
210
211 template <typename T>
212 bool isFlagSet (T flag, T value)
213 {
214 return (int)(value & flag) != 0;
215 }
216
217
218
219 class Net;
220
221
222
223
224
225
226
227 typedef std::vector<char> DropContainer;
228
229
230 /*! \brief The Batch class encapsulates one mini-batch
231 *
232 * Holds a const_iterator to the beginning and the end of one batch in a vector of Pattern
233 */
234 class Batch
235 {
236 public:
237 typedef typename std::vector<Pattern>::const_iterator const_iterator;
238
239 Batch (typename std::vector<Pattern>::const_iterator itBegin, typename std::vector<Pattern>::const_iterator itEnd)
240 : m_itBegin (itBegin)
241 , m_itEnd (itEnd)
242 {}
243
244 const_iterator begin () const { return m_itBegin; }
245 const_iterator end () const { return m_itEnd; }
246
247 size_t size () const { return std::distance (begin (), end ()); }
248
249 private:
250 const_iterator m_itBegin; ///< iterator denoting the beginning of the batch
251 const_iterator m_itEnd; ///< iterator denoting the end of the batch
252 };
253
254
255
256
257
258
259 template <typename ItSource, typename ItWeight, typename ItTarget>
260 void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd);
261
262
263
264 template <typename ItSource, typename ItWeight, typename ItPrev>
265 void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd);
266
267
268
269
270
271 template <typename ItValue, typename ItFunction>
272 void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction);
273
274
275 template <typename ItValue, typename ItFunction, typename ItInverseFunction, typename ItGradient>
276 void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, ItGradient itGradient);
277
278
279
280 template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
281 void update (ItSource itSource, ItSource itSourceEnd,
282 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
283 ItTargetGradient itTargetGradientBegin,
284 ItGradient itGradient);
285
286
287
288 template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
289 void update (ItSource itSource, ItSource itSourceEnd,
290 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
291 ItTargetGradient itTargetGradientBegin,
292 ItGradient itGradient,
293 ItWeight itWeight, double weightDecay);
294
295
296
297 // ----- signature of a minimizer -------------
298 // class Minimizer
299 // {
300 // public:
301
302 // template <typename Function, typename Variables, typename PassThrough>
303 // double operator() (Function& fnc, Variables& vars, PassThrough& passThrough)
304 // {
305 // // auto itVars = begin (vars);
306 // // auto itVarsEnd = end (vars);
307
308 // std::vector<double> myweights;
309 // std::vector<double> gradients;
310
311 // double value = fnc (passThrough, myweights);
312 // value = fnc (passThrough, myweights, gradients);
313 // return value;
314 // }
315 // };
316
317
318
319 ///< list all the minimizer types
321 {
322 fSteepest ///< SGD
323 };
324
325
326
327
328
329 /*! \brief Steepest Gradient Descent algorithm (SGD)
330 *
331 * Implements a steepest gradient descent minimization algorithm
332 */
334 {
335 public:
336
338
339
340 /*! \brief c'tor
341 *
342 * C'tor
343 *
344 * \param learningRate denotes the learning rate for the SGD algorithm
345 * \param momentum fraction of the velocity which is taken over from the last step
346 * \param repetitions re-compute the gradients each "repetitions" steps
347 */
348 Steepest (double learningRate = 1e-4,
349 double momentum = 0.5,
350 size_t repetitions = 10)
351 : m_repetitions (repetitions)
352 , m_alpha (learningRate)
353 , m_beta (momentum)
354 {}
355
356 /*! \brief operator to call the steepest gradient descent algorithm
357 *
358 * entry point to start the minimization procedure
359 *
360 * \param fitnessFunction (templated) function which has to be provided. This function is minimized
361 * \param weights (templated) a reference to a container of weights. The result of the minimization procedure
362 * is returned via this reference (needs to support std::begin and std::end
363 * \param passThrough (templated) object which can hold any data which the fitness function needs. This object
364 * is not touched by the minimizer; This object is provided to the fitness function when
365 * called
366 */
367 template <typename Function, typename Weights, typename PassThrough>
368 double operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough);
369
370
371 double m_alpha; ///< internal parameter (learningRate)
372 double m_beta; ///< internal parameter (momentum)
373 std::vector<double> m_prevGradients; ///< vector remembers the gradients of the previous step
374
375 std::vector<double> m_localWeights; ///< local weights for reuse in thread.
376 std::vector<double> m_localGradients; ///< local gradients for reuse in thread.
377 };
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
397 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
398
399
400
401 template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
402 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
403
404
405
406
407 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
408 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
409
410
411
412
413
414 template <typename ItWeight>
415 double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization);
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430 /*! \brief LayerData holds the data of one layer
431 *
432 * LayerData holds the data of one layer, but not its layout
433 *
434 *
435 */
437 {
438 public:
439 typedef std::vector<double> container_type;
440
441 typedef container_type::iterator iterator_type;
442 typedef container_type::const_iterator const_iterator_type;
443
444 typedef std::vector<std::function<double(double)> > function_container_type;
445 typedef function_container_type::iterator function_iterator_type;
446 typedef function_container_type::const_iterator const_function_iterator_type;
447
448 typedef DropContainer::const_iterator const_dropout_iterator;
449
450 /*! \brief c'tor of LayerData
451 *
452 * C'tor of LayerData for the input layer
453 *
454 * \param itInputBegin iterator to the begin of a vector which holds the values of the nodes of the neural net
455 * \param itInputEnd iterator to the end of a vector which holdsd the values of the nodes of the neural net
456 * \param eModeOutput indicates a potential tranformation of the output values before further computation
457 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
458 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
459 * output values (mutually exclusive probability)
460 */
462
463
464 /*! \brief c'tor of LayerData
465 *
466 * C'tor of LayerData for the input layer
467 *
468 * \param inputSize input size of this layer
469 */
470 LayerData (size_t inputSize);
472
473
474 /*! \brief c'tor of LayerData
475 *
476 * C'tor of LayerData for all layers which are not the input layer; Used during the training of the DNN
477 *
478 * \param size size of the layer
479 * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
480 * \param itGradientBegin indicates the start of the gradients for this layer on the gradient vector
481 * \param activationFunction indicates activation functions for this layer
482 * \param inverseActivationFunction indicates the inverse activation functions for this layer
483 * \param eModeOutput indicates a potential tranformation of the output values before further computation
484 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
485 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
486 * output values (mutually exclusive probability)
487 */
488 LayerData (size_t size,
489 const_iterator_type itWeightBegin,
490 iterator_type itGradientBegin,
491 std::shared_ptr<std::function<double(double)>> activationFunction,
492 std::shared_ptr<std::function<double(double)>> inverseActivationFunction,
494
495 /*! \brief c'tor of LayerData
496 *
497 * C'tor of LayerData for all layers which are not the input layer; Used during the application of the DNN
498 *
499 * \param size size of the layer
500 * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
501 * \param activationFunction indicates the activation function for this layer
502 * \param eModeOutput indicates a potential tranformation of the output values before further computation
503 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
504 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
505 * output values (mutually exclusive probability)
506 */
507 LayerData (size_t size, const_iterator_type itWeightBegin,
508 std::shared_ptr<std::function<double(double)>> activationFunction,
510
511 /*! \brief copy c'tor of LayerData
512 *
513 *
514 */
515 LayerData (const LayerData& other)
516 : m_size (other.m_size)
518 , m_itInputEnd (other.m_itInputEnd)
519 , m_deltas (other.m_deltas)
521 , m_values (other.m_values)
522 , m_itDropOut (other.m_itDropOut)
523 , m_hasDropOut (other.m_hasDropOut)
529 , m_hasWeights (other.m_hasWeights)
532 {}
533
534 /*! \brief move c'tor of LayerData
535 *
536 *
537 */
539 : m_size (other.m_size)
541 , m_itInputEnd (other.m_itInputEnd)
542 , m_deltas (std::move(other.m_deltas))
543 , m_valueGradients (std::move(other.m_valueGradients))
544 , m_values (std::move(other.m_values))
545 , m_itDropOut (other.m_itDropOut)
546 , m_hasDropOut (other.m_hasDropOut)
549 , m_activationFunction (std::move(other.m_activationFunction))
552 , m_hasWeights (other.m_hasWeights)
555 {}
556
557
558 /*! \brief change the input iterators
559 *
560 *
561 * \param itInputBegin indicates the start of the input node vector
562 * \param itInputEnd indicates the end of the input node vector
563 *
564 */
565 void setInput (const_iterator_type itInputBegin, const_iterator_type itInputEnd)
566 {
567 m_isInputLayer = true;
568 m_itInputBegin = itInputBegin;
569 m_itInputEnd = itInputEnd;
570 }
571
572 /*! \brief clear the values and the deltas
573 *
574 *
575 */
576 void clear ()
577 {
578 m_values.assign (m_values.size (), 0.0);
579 m_deltas.assign (m_deltas.size (), 0.0);
580 }
581
582 const_iterator_type valuesBegin () const { return m_isInputLayer ? m_itInputBegin : begin (m_values); } ///< returns const iterator to the begin of the (node) values
583 const_iterator_type valuesEnd () const { return m_isInputLayer ? m_itInputEnd : end (m_values); } ///< returns iterator to the end of the (node) values
584
585 iterator_type valuesBegin () { assert (!m_isInputLayer); return begin (m_values); } ///< returns iterator to the begin of the (node) values
586 iterator_type valuesEnd () { assert (!m_isInputLayer); return end (m_values); } ///< returns iterator to the end of the (node) values
587
588 ModeOutputValues outputMode () const { return m_eModeOutput; } ///< returns the output mode
589 container_type probabilities () const { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them
590
591 iterator_type deltasBegin () { return begin (m_deltas); } ///< returns iterator to the begin of the deltas (back-propagation)
592 iterator_type deltasEnd () { return end (m_deltas); } ///< returns iterator to the end of the deltas (back-propagation)
593
594 const_iterator_type deltasBegin () const { return begin (m_deltas); } ///< returns const iterator to the begin of the deltas (back-propagation)
595 const_iterator_type deltasEnd () const { return end (m_deltas); } ///< returns const iterator to the end of the deltas (back-propagation)
596
597 iterator_type valueGradientsBegin () { return begin (m_valueGradients); } ///< returns iterator to the begin of the gradients of the node values
598 iterator_type valueGradientsEnd () { return end (m_valueGradients); } ///< returns iterator to the end of the gradients of the node values
599
600 const_iterator_type valueGradientsBegin () const { return begin (m_valueGradients); } ///< returns const iterator to the begin of the gradients
601 const_iterator_type valueGradientsEnd () const { return end (m_valueGradients); } ///< returns const iterator to the end of the gradients
602
603 iterator_type gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients
604 const_iterator_type gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients
605 const_iterator_type weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer
606
607 std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; }
608 std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; }
609
610 /*! \brief set the drop-out info for this layer
611 *
612 */
613 template <typename Iterator>
614 void setDropOut (Iterator itDrop) { m_itDropOut = itDrop; m_hasDropOut = true; }
615
616 /*! \brief clear the drop-out-data for this layer
617 *
618 *
619 */
620 void clearDropOut () { m_hasDropOut = false; }
621
622 bool hasDropOut () const { return m_hasDropOut; } ///< has this layer drop-out turned on?
623 const_dropout_iterator dropOut () const { assert (m_hasDropOut); return m_itDropOut; } ///< return the begin of the drop-out information
624
625 size_t size () const { return m_size; } ///< return the size of the layer
626
627 private:
628
629 /*! \brief compute the probabilities from the node values
630 *
631 *
632 */
634
635 private:
636
637 size_t m_size; ////< layer size
638
639 const_iterator_type m_itInputBegin; ///< iterator to the first of the nodes in the input node vector
640 const_iterator_type m_itInputEnd; ///< iterator to the end of the nodes in the input node vector
641
642 std::vector<double> m_deltas; ///< stores the deltas for the DNN training
643 std::vector<double> m_valueGradients; ///< stores the gradients of the values (nodes)
644 std::vector<double> m_values; ///< stores the values of the nodes in this layer
645 const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped
646 bool m_hasDropOut; ///< dropOut is turned on?
647
648 const_iterator_type m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector
649 iterator_type m_itGradientBegin; ///< iterator to the first gradient of this layer in the gradient vector
650
651 std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< activation function for this layer
652 std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< inverse activation function for this layer
653
654 bool m_isInputLayer; ///< is this layer an input layer
655 bool m_hasWeights; ///< does this layer have weights (it does not if it is the input layer)
656 bool m_hasGradients; ///< does this layer have gradients (only if in training mode)
657
658 ModeOutputValues m_eModeOutput; ///< stores the output mode (DIRECT, SIGMOID, SOFTMAX)
659
660 };
661
662
663
664
665
666 /*! \brief Layer defines the layout of a layer
667 *
668 * Layer defines the layout of a specific layer in the DNN
669 * Objects of this class don't hold the layer data itself (see class "LayerData")
670 *
671 */
672 class Layer
673 {
674 public:
675
676 /*! \brief c'tor for defining a Layer
677 *
678 *
679 */
681
682 ModeOutputValues modeOutputValues () const { return m_eModeOutputValues; } ///< get the mode-output-value (direct, probabilities)
683 void modeOutputValues (ModeOutputValues eModeOutputValues) { m_eModeOutputValues = eModeOutputValues; } ///< set the mode-output-value
684
685 size_t numNodes () const { return m_numNodes; } ///< return the number of nodes of this layer
686 size_t numWeights (size_t numInputNodes) const { return numInputNodes * numNodes (); } ///< return the number of weights for this layer (fully connected)
687
688 std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; } ///< fetch the activation function for this layer
689 std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; } ///< fetch the inverse activation function for this layer
690
691 EnumFunction activationFunctionType () const { return m_activationFunctionType; } ///< get the activation function type for this layer
692
693 private:
694
695
696 std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< stores the activation function
697 std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< stores the inverse activation function
698
699
701
702 ModeOutputValues m_eModeOutputValues; ///< do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned as such
704
705 friend class Net;
706 };
707
708
709
710
711
712 template <typename LAYERDATA>
713 void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
714
715
716 template <typename LAYERDATA>
717 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
718
719
720 template <typename LAYERDATA>
721 void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization);
722
723
724
725 /*! \brief Settings for the training of the neural net
726 *
727 *
728 */
730 {
731 public:
732
733 /*! \brief c'tor
734 *
735 *
736 */
738 size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
739 double _factorWeightDecay = 1e-5, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE,
740 MinimizerType _eMinimizerType = MinimizerType::fSteepest,
741 double _learningRate = 1e-5, double _momentum = 0.3,
742 int _repetitions = 3,
743 bool _multithreading = true);
744
745 /*! \brief d'tor
746 *
747 *
748 */
749 virtual ~Settings ();
750
751
752 /*! \brief set the drop-out configuration (layer-wise)
753 *
754 * \param begin begin of an array or vector denoting the drop-out probabilities for each layer
755 * \param end end of an array or vector denoting the drop-out probabilities for each layer
756 * \param _dropRepetitions denotes after how many repetitions the drop-out setting (which nodes are dropped out exactly) is changed
757 */
758 template <typename Iterator>
759 void setDropOut (Iterator begin, Iterator end, size_t _dropRepetitions) { m_dropOut.assign (begin, end); m_dropRepetitions = _dropRepetitions; }
760
761 size_t dropRepetitions () const { return m_dropRepetitions; }
762 const std::vector<double>& dropFractions () const { return m_dropOut; }
763
764 void setMonitoring (std::shared_ptr<Monitoring> ptrMonitoring) { fMonitoring = ptrMonitoring; } ///< prepared for monitoring
765
766 size_t convergenceSteps () const { return m_convergenceSteps; } ///< how many steps until training is deemed to have converged
767 size_t batchSize () const { return m_batchSize; } ///< mini-batch size
768 size_t testRepetitions () const { return m_testRepetitions; } ///< how often is the test data tested
769 double factorWeightDecay () const { return m_factorWeightDecay; } ///< get the weight-decay factor
770
771 double learningRate () const { return fLearningRate; } ///< get the learning rate
772 double momentum () const { return fMomentum; } ///< get the momentum (e.g. for SGD)
773 int repetitions () const { return fRepetitions; } ///< how many steps have to be gone until the batch is changed
774 MinimizerType minimizerType () const { return fMinimizerType; } ///< which minimizer shall be used (e.g. SGD)
775
776
777
778
779
780
781 virtual void testSample (double /*error*/, double /*output*/, double /*target*/, double /*weight*/) {} ///< virtual function to be used for monitoring (callback)
782 virtual void startTrainCycle () ///< callback for monitoring and logging
783 {
786 m_minError = 1e10;
787 }
788 virtual void endTrainCycle (double /*error*/) {} ///< callback for monitoring and logging
789
790 virtual void setProgressLimits (double minProgress = 0, double maxProgress = 100) ///< for monitoring and logging (set the current "progress" limits for the display of the progress)
791 {
792 m_minProgress = minProgress;
793 m_maxProgress = maxProgress;
794 }
795 virtual void startTraining () ///< start drawing the progress bar
796 {
798 }
799 virtual void cycle (double progress, TString text) ///< advance on the progress bar
800 {
802 }
803
804 virtual void startTestCycle () {} ///< callback for monitoring and loggging
805 virtual void endTestCycle () {} ///< callback for monitoring and loggging
806 virtual void testIteration () {} ///< callback for monitoring and loggging
807 virtual void drawSample (const std::vector<double>& /*input*/, const std::vector<double>& /* output */, const std::vector<double>& /* target */, double /* patternWeight */) {} ///< callback for monitoring and loggging
808
809 virtual void computeResult (const Net& /* net */, std::vector<double>& /* weights */) {} ///< callback for monitoring and loggging
810
811 virtual bool hasConverged (double testError); ///< has this training converged already?
812
813 EnumRegularization regularization () const { return m_regularization; } ///< some regularization of the DNN is turned on?
814
815 bool useMultithreading () const { return m_useMultithreading; } ///< is multithreading turned on?
816
817
818 void pads (int numPads) { if (fMonitoring) fMonitoring->pads (numPads); } ///< preparation for monitoring
819 void create (std::string histoName, int bins, double min, double max) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max); } ///< for monitoring
820 void create (std::string histoName, int bins, double min, double max, int bins2, double min2, double max2) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max, bins2, min2, max2); } ///< for monitoring
821 void addPoint (std::string histoName, double x) { if (fMonitoring) fMonitoring->addPoint (histoName, x); } ///< for monitoring
822 void addPoint (std::string histoName, double x, double y) {if (fMonitoring) fMonitoring->addPoint (histoName, x, y); } ///< for monitoring
823 void plot (std::string histoName, std::string options, int pad, EColor color) { if (fMonitoring) fMonitoring->plot (histoName, options, pad, color); } ///< for monitoring
824 void clear (std::string histoName) { if (fMonitoring) fMonitoring->clear (histoName); } ///< for monitoring
825 bool exists (std::string histoName) { if (fMonitoring) return fMonitoring->exists (histoName); return false; } ///< for monitoring
826
827 size_t convergenceCount () const { return m_convergenceCount; } ///< returns the current convergence count
828 size_t maxConvergenceCount () const { return m_maxConvergenceCount; } ///< returns the max convergence count so far
829 size_t minError () const { return m_minError; } ///< returns the smallest error so far
830
831 public:
832 Timer m_timer; ///< timer for monitoring
833 double m_minProgress; ///< current limits for the progress bar
834 double m_maxProgress; ///< current limits for the progress bar
835
836
837 size_t m_convergenceSteps; ///< number of steps without improvement to consider the DNN to have converged
838 size_t m_batchSize; ///< mini-batch size
841
842 size_t count_E;
843 size_t count_dE;
846
848
850 std::vector<double> m_dropOut;
851
853 double fMomentum;
856
860
861
862 protected:
864
865 std::shared_ptr<Monitoring> fMonitoring;
866 };
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890 /*! \brief Settings for classification
891 *
892 * contains additional settings if the DNN problem is classification
893 */
895 {
896 public:
897 /*! \brief c'tor
898 *
899 *
900 */
902 size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
903 double _factorWeightDecay = 1e-5, EnumRegularization _regularization = EnumRegularization::NONE,
904 size_t _scaleToNumEvents = 0, MinimizerType _eMinimizerType = MinimizerType::fSteepest,
905 double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3,
906 bool _useMultithreading = true)
907 : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay,
908 _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading)
909 , m_ams ()
912 , m_scaleToNumEvents (_scaleToNumEvents)
913 , m_cutValue (10.0)
917 {
918 }
919
920 /*! \brief d'tor
921 *
922 *
923 */
925 {
926 }
927
928 void startTrainCycle ();
929 void endTrainCycle (double /*error*/);
930 void testIteration () { if (fMonitoring) fMonitoring->ProcessEvents (); }
931
932
933 /* void createHistograms () */
934 /* { */
935 /* std::cout << "is hist ROC existing?" << std::endl; */
936 /* if (m_histROC) */
937 /* { */
938 /* std::cout << "--> yes" << std::endl; */
939 /* fMonitoring->ProcessEvents (); */
940 /* return; */
941 /* } */
942
943 /* std::cout << "create histograms" << std::endl; */
944 /* TCanvas* canvas = fMonitoring->GetCanvas (); */
945 /* if (canvas) */
946 /* { */
947 /* std::cout << "canvas divide" << std::endl; */
948 /* canvas->cd (); */
949 /* canvas->Divide (2,2); */
950 /* } */
951 /* if (!m_histROC) */
952 /* { */
953 /* m_histROC = new TH2F ("ROC","ROC", 1000, 0, 1.0, 1000, 0, 1.0); m_histROC->SetDirectory (0); */
954 /* m_histROC->SetLineColor (kBlue); */
955 /* } */
956 /* if (!m_histSignificance) */
957 /* { */
958 /* m_histSignificance = new TH2F ("Significance", "Significance", 1000, 0,1.0, 5, 0.0, 2.0); */
959 /* m_histSignificance->SetDirectory (0); */
960 /* m_histSignificance->SetBit (TH1::kCanRebin); */
961 /* m_histROC->SetLineColor (kRed); */
962 /* } */
963 /* if (!m_histError) */
964 /* { */
965 /* m_histError = new TH1F ("Error", "Error", 100, 0, 100); */
966 /* m_histError->SetDirectory (0); */
967 /* m_histError->SetBit (TH1::kCanRebin); */
968 /* m_histROC->SetLineColor (kGreen); */
969 /* } */
970 /* if (!m_histOutputSignal) */
971 /* { */
972 /* m_histOutputSignal = new TH1F ("Signal", "Signal", 100, 0, 1.0); */
973 /* m_histOutputSignal->SetDirectory (0); */
974 /* m_histOutputSignal->SetBit (TH1::kCanRebin); */
975 /* } */
976 /* if (!m_histOutputBackground) */
977 /* { */
978 /* m_histOutputBackground = new TH1F ("Background", "Background", 100, 0, 1.0); */
979 /* m_histOutputBackground->SetDirectory (0); */
980 /* m_histOutputBackground->SetBit (TH1::kCanRebin); */
981 /* } */
982
983 /* fMonitoring->ProcessEvents (); */
984 /* } */
985
986 void testSample (double error, double output, double target, double weight);
987
988 virtual void startTestCycle ();
989 virtual void endTestCycle ();
990
991
992 void setWeightSums (double sumOfSigWeights, double sumOfBkgWeights);
993 void setResultComputation (std::string _fileNameNetConfig, std::string _fileNameResult, std::vector<Pattern>* _resultPatternContainer);
994
995 std::vector<double> m_input;
996 std::vector<double> m_output;
997 std::vector<double> m_targets;
998 std::vector<double> m_weights;
999
1000 std::vector<double> m_ams;
1001 std::vector<double> m_significances;
1002
1003
1007
1009 std::vector<Pattern>* m_pResultPatternContainer;
1010 std::string m_fileNameResult;
1012
1013
1014 /* TH2F* m_histROC; */
1015 /* TH2F* m_histSignificance; */
1016
1017 /* TH1F* m_histError; */
1018 /* TH1F* m_histOutputSignal; */
1019 /* TH1F* m_histOutputBackground; */
1020 };
1021
1022
1023
1024
1025
1026
1027
1028 ///< used to distinguish between different function signatures
1029 enum class ModeOutput
1030 {
1031 FETCH
1032 };
1033
1034 /*! \brief error functions to be chosen from
1035 *
1036 *
1037 */
1039 {
1040 SUMOFSQUARES = 'S',
1041 CROSSENTROPY = 'C',
1043 };
1044
1045 /*! \brief weight initialization strategies to be chosen from
1046 *
1047 *
1048 */
1050 {
1052 };
1053
1054
1055
1056 /*! \brief neural net
1057 *
1058 * holds the structure of all layers and some data for the whole net
1059 * does not know the layer data though (i.e. values of the nodes and weights)
1060 */
1061 class Net
1062 {
1063 public:
1064
1065 typedef std::vector<double> container_type;
1066 typedef container_type::iterator iterator_type;
1067 typedef std::pair<iterator_type,iterator_type> begin_end_type;
1068
1069
1070 /*! \brief c'tor
1071 *
1072 *
1073 */
1076 , m_sizeInput (0)
1077 , m_layers ()
1078 {
1079 }
1080
1081 /*! \brief d'tor
1082 *
1083 *
1084 */
1085 Net (const Net& other)
1087 , m_sizeInput (other.m_sizeInput)
1088 , m_layers (other.m_layers)
1089 {
1090 }
1091
1092 void setInputSize (size_t sizeInput) { m_sizeInput = sizeInput; } ///< set the input size of the DNN
1093 void setOutputSize (size_t sizeOutput) { m_sizeOutput = sizeOutput; } ///< set the output size of the DNN
1094 void addLayer (Layer& layer) { m_layers.push_back (layer); } ///< add a layer (layout)
1095 void addLayer (Layer&& layer) { m_layers.push_back (layer); }
1096 void setErrorFunction (ModeErrorFunction eErrorFunction) { m_eErrorFunction = eErrorFunction; } ///< which error function is to be used
1097
1098 size_t inputSize () const { return m_sizeInput; } ///< input size of the DNN
1099 size_t outputSize () const { return m_sizeOutput; } ///< output size of the DNN
1100
1101 /*! \brief set the drop out configuration
1102 *
1103 *
1104 */
1105 template <typename WeightsType, typename DropProbabilities>
1106 void dropOutWeightFactor (WeightsType& weights,
1107 const DropProbabilities& drops,
1108 bool inverse = false);
1109
1110 /*! \brief start the training
1111 *
1112 * \param weights weight vector
1113 * \param trainPattern training pattern
1114 * \param testPattern test pattern
1115 * \param minimizer use this minimizer for training (e.g. SGD)
1116 * \param settings settings used for this training run
1117 */
1118 template <typename Minimizer>
1119 double train (std::vector<double>& weights,
1120 std::vector<Pattern>& trainPattern,
1121 const std::vector<Pattern>& testPattern,
1122 Minimizer& minimizer,
1123 Settings& settings);
1124
1125 /*! \brief pre-training for future use
1126 *
1127 *
1128 */
1129 template <typename Minimizer>
1130 void preTrain (std::vector<double>& weights,
1131 std::vector<Pattern>& trainPattern,
1132 const std::vector<Pattern>& testPattern,
1133 Minimizer& minimizer, Settings& settings);
1134
1135
1136 /*! \brief executes one training cycle
1137 *
1138 * \param minimizer the minimizer to be used
1139 * \param weights the weight vector to be used
1140 * \param itPatternBegin the pattern to be trained with
1141 * \param itPatternEnd the pattern to be trainied with
1142 * \param settings the settings for the training
1143 * \param dropContainer the configuration for DNN drop-out
1144 */
1145 template <typename Iterator, typename Minimizer>
1146 inline double trainCycle (Minimizer& minimizer, std::vector<double>& weights,
1147 Iterator itPatternBegin, Iterator itPatternEnd,
1148 Settings& settings,
1149 DropContainer& dropContainer);
1150
1151 size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net
1152 size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net
1153
1154 template <typename Weights>
1155 std::vector<double> compute (const std::vector<double>& input, const Weights& weights) const; ///< compute the net with the given input and the given weights
1156
1157 template <typename Weights, typename PassThrough>
1158 double operator() (PassThrough& settingsAndBatch, const Weights& weights) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients
1159
1160 template <typename Weights, typename PassThrough, typename OutContainer>
1161 double operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput eFetch, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function
1162
1163 template <typename Weights, typename Gradients, typename PassThrough>
1164 double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well
1165
1166 template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1167 double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const;
1168
1169
1170 template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1171 std::vector<std::vector<LayerData>> prepareLayerData (LayerContainer& layers,
1172 Batch& batch,
1173 const DropContainer& dropContainer,
1174 ItWeight itWeightBegin,
1175 ItWeight itWeightEnd,
1176 ItGradient itGradientBegin,
1177 ItGradient itGradientEnd,
1178 size_t& totalNumWeights) const;
1179
1180 template <typename LayerContainer>
1181 void forwardPattern (const LayerContainer& _layers,
1182 std::vector<LayerData>& layerData) const;
1183
1184
1185 template <typename LayerContainer, typename LayerPatternContainer>
1186 void forwardBatch (const LayerContainer& _layers,
1187 LayerPatternContainer& layerPatternData,
1188 std::vector<double>& valuesMean,
1189 std::vector<double>& valuesStdDev,
1190 size_t trainFromLayer) const;
1191
1192 template <typename OutputContainer>
1193 void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const;
1194
1195 template <typename OutputContainer>
1196 void fetchOutput (const std::vector<LayerData>& layerPatternData, OutputContainer& outputContainer) const;
1197
1198
1199 template <typename ItWeight>
1200 std::tuple</*sumError*/double,/*sumWeights*/double> computeError (const Settings& settings,
1201 std::vector<LayerData>& lastLayerData,
1202 Batch& batch,
1203 ItWeight itWeightBegin,
1204 ItWeight itWeightEnd) const;
1205
1206 template <typename Settings>
1207 void backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1208 const Settings& settings,
1209 size_t trainFromLayer,
1210 size_t totalNumWeights) const;
1211
1212
1213
1214 /*! \brief main NN computation function
1215 *
1216 *
1217 */
1218 template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1219 double forward_backward (LayerContainer& layers, PassThrough& settingsAndBatch,
1220 ItWeight itWeightBegin, ItWeight itWeightEnd,
1221 ItGradient itGradientBegin, ItGradient itGradientEnd,
1222 size_t trainFromLayer,
1223 OutContainer& outputContainer, bool fetchOutput) const;
1224
1225
1226
1227 double E ();
1228 void dE ();
1229
1230
1231 /*! \brief computes the error of the DNN
1232 *
1233 *
1234 */
1235 template <typename Container, typename ItWeight>
1236 double errorFunction (LayerData& layerData,
1237 Container truth,
1238 ItWeight itWeight,
1239 ItWeight itWeightEnd,
1240 double patternWeight,
1241 double factorWeightDecay,
1242 EnumRegularization eRegularization) const;
1243
1244
1245 const std::vector<Layer>& layers () const { return m_layers; } ///< returns the layers (structure)
1246 std::vector<Layer>& layers () { return m_layers; } ///< returns the layers (structure)
1247
1248 void removeLayer () { m_layers.pop_back (); } ///< remove one layer
1249
1250
1251 void clear () ///< clear one layer
1252 {
1253 m_layers.clear ();
1255 }
1256
1257
1258 template <typename OutIterator>
1260 OutIterator itWeight); ///< initialize the weights with the given strategy
1261
1262 protected:
1263
1264 void fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t numNodes) const; ///< prepare the drop-out-container (select the nodes which are to be dropped out)
1265
1266
1267 private:
1268
1269 ModeErrorFunction m_eErrorFunction; ///< denotes the error function
1270 size_t m_sizeInput; ///< input size of this DNN
1271 size_t m_sizeOutput; ///< outut size of this DNN
1272 std::vector<Layer> m_layers; ///< layer-structure-data
1273
1274 protected:
1275 // variables for JsMVA (interactive training in jupyter notebook)
1277 bool * fExitFromTraining = nullptr;
1278 UInt_t *fIPyMaxIter = nullptr, *fIPyCurrentIter = nullptr;
1279
1280 public:
1281
1282 // setup ipython interactive variables
1284 fInteractive = fI;
1285 fExitFromTraining = fE;
1286 fIPyMaxIter = M;
1288 }
1289 };
1290
1291
1292
1293
1294typedef std::tuple<Settings&, Batch&, DropContainer&> pass_through_type;
1295
1296
1297
1298
1299
1300
1301
1302 } // namespace DNN
1303} // namespace TMVA
1304
1305
1306// include the implementations (in header file, because they are templated)
1307#include "TMVA/NeuralNet.icc"
1308
1309#endif
1310
double
Definition: Converters.cxx:939
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:45
unsigned int UInt_t
Definition: RtypesCore.h:46
EColor
Definition: Rtypes.h:65
char name[80]
Definition: TGX11.cxx:110
int type
Definition: TGX11.cxx:121
double sqrt(double)
Double_t(* Function)(Double_t)
Definition: Functor.C:4
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:235
const_iterator m_itEnd
iterator denoting the end of the batch
Definition: NeuralNet.h:251
const_iterator begin() const
Definition: NeuralNet.h:244
const_iterator end() const
Definition: NeuralNet.h:245
Batch(typename std::vector< Pattern >::const_iterator itBegin, typename std::vector< Pattern >::const_iterator itEnd)
Definition: NeuralNet.h:239
size_t size() const
Definition: NeuralNet.h:247
std::vector< Pattern >::const_iterator const_iterator
Definition: NeuralNet.h:237
const_iterator m_itBegin
iterator denoting the beginning of the batch
Definition: NeuralNet.h:250
Settings for classificationused to distinguish between different function signatures.
Definition: NeuralNet.h:895
void startTrainCycle()
action to be done when the training cycle is started (e.g.
Definition: NeuralNet.cxx:281
void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:930
void endTrainCycle(double)
action to be done when the training cycle is ended (e.g.
Definition: NeuralNet.cxx:296
virtual void endTestCycle()
action to be done when the training cycle is ended (e.g.
Definition: NeuralNet.cxx:326
std::vector< Pattern > * m_pResultPatternContainer
Definition: NeuralNet.h:1009
void setResultComputation(std::string _fileNameNetConfig, std::string _fileNameResult, std::vector< Pattern > *_resultPatternContainer)
preparation for monitoring output
Definition: NeuralNet.cxx:523
ClassificationSettings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, EnumRegularization _regularization=EnumRegularization::NONE, size_t _scaleToNumEvents=0, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _useMultithreading=true)
c'tor
Definition: NeuralNet.h:901
std::vector< double > m_input
Definition: NeuralNet.h:995
std::vector< double > m_significances
Definition: NeuralNet.h:1001
std::vector< double > m_weights
Definition: NeuralNet.h:998
virtual ~ClassificationSettings()
d'tor
Definition: NeuralNet.h:924
std::vector< double > m_targets
Definition: NeuralNet.h:997
void testSample(double error, double output, double target, double weight)
action to be done after the computation of a test sample (e.g.
Definition: NeuralNet.cxx:304
virtual void startTestCycle()
action to be done when the test cycle is started (e.g.
Definition: NeuralNet.cxx:316
void setWeightSums(double sumOfSigWeights, double sumOfBkgWeights)
set the weight sums to be scaled to (preparations for monitoring output)
Definition: NeuralNet.cxx:515
std::vector< double > m_ams
Definition: NeuralNet.h:1000
std::vector< double > m_output
Definition: NeuralNet.h:996
LayerData holds the data of one layer.
Definition: NeuralNet.h:437
const_iterator_type m_itInputBegin
iterator to the first of the nodes in the input node vector
Definition: NeuralNet.h:639
const_iterator_type deltasBegin() const
returns const iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:594
iterator_type valuesBegin()
returns iterator to the begin of the (node) values
Definition: NeuralNet.h:585
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:583
bool m_hasGradients
does this layer have gradients (only if in training mode)
Definition: NeuralNet.h:656
std::vector< double > m_deltas
stores the deltas for the DNN training
Definition: NeuralNet.h:642
container_type::iterator iterator_type
Definition: NeuralNet.h:441
LayerData(const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput=ModeOutputValues::DIRECT)
c'tor of LayerData
Definition: NeuralNet.cxx:81
void setDropOut(Iterator itDrop)
set the drop-out info for this layer
Definition: NeuralNet.h:614
void setInput(const_iterator_type itInputBegin, const_iterator_type itInputEnd)
change the input iterators
Definition: NeuralNet.h:565
std::vector< std::function< double(double)> > function_container_type
Definition: NeuralNet.h:444
iterator_type valuesEnd()
returns iterator to the end of the (node) values
Definition: NeuralNet.h:586
const_dropout_iterator m_itDropOut
iterator to a container indicating if the corresponding node is to be dropped
Definition: NeuralNet.h:645
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:597
iterator_type gradientsBegin()
returns iterator to the begin of the gradients
Definition: NeuralNet.h:603
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:591
bool m_hasWeights
does this layer have weights (it does not if it is the input layer)
Definition: NeuralNet.h:655
const_dropout_iterator dropOut() const
return the begin of the drop-out information
Definition: NeuralNet.h:623
LayerData(LayerData &&other)
move c'tor of LayerData
Definition: NeuralNet.h:538
std::vector< double > container_type
Definition: NeuralNet.h:439
size_t size() const
return the size of the layer
Definition: NeuralNet.h:625
const_iterator_type weightsBegin() const
returns const iterator to the begin of the weights for this layer
Definition: NeuralNet.h:605
function_container_type::const_iterator const_function_iterator_type
Definition: NeuralNet.h:446
LayerData(const LayerData &other)
copy c'tor of LayerData
Definition: NeuralNet.h:515
function_container_type::iterator function_iterator_type
Definition: NeuralNet.h:445
std::vector< double > m_values
stores the values of the nodes in this layer
Definition: NeuralNet.h:644
const_iterator_type m_itInputEnd
iterator to the end of the nodes in the input node vector
Definition: NeuralNet.h:640
container_type::const_iterator const_iterator_type
Definition: NeuralNet.h:442
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:588
iterator_type m_itGradientBegin
iterator to the first gradient of this layer in the gradient vector
Definition: NeuralNet.h:649
const_iterator_type gradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:604
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:608
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:592
std::vector< double > m_valueGradients
stores the gradients of the values (nodes)
Definition: NeuralNet.h:643
const_iterator_type m_itConstWeightBegin
const iterator to the first weight of this layer in the weight vector
Definition: NeuralNet.h:648
iterator_type valueGradientsEnd()
returns iterator to the end of the gradients of the node values
Definition: NeuralNet.h:598
void clear()
clear the values and the deltas
Definition: NeuralNet.h:576
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:607
container_type computeProbabilities() const
compute the probabilities from the node values
Definition: NeuralNet.cxx:140
const_iterator_type deltasEnd() const
returns const iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:595
bool m_hasDropOut
dropOut is turned on?
Definition: NeuralNet.h:646
bool m_isInputLayer
is this layer an input layer
Definition: NeuralNet.h:654
bool hasDropOut() const
has this layer drop-out turned on?
Definition: NeuralNet.h:622
const_iterator_type valueGradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:600
const_iterator_type valueGradientsEnd() const
returns const iterator to the end of the gradients
Definition: NeuralNet.h:601
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:589
void clearDropOut()
clear the drop-out-data for this layer
Definition: NeuralNet.h:620
ModeOutputValues m_eModeOutput
stores the output mode (DIRECT, SIGMOID, SOFTMAX)
Definition: NeuralNet.h:658
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
inverse activation function for this layer
Definition: NeuralNet.h:652
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:448
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:582
std::shared_ptr< std::function< double(double)> > m_activationFunction
activation function for this layer
Definition: NeuralNet.h:651
Layer defines the layout of a layer.
Definition: NeuralNet.h:673
void modeOutputValues(ModeOutputValues eModeOutputValues)
set the mode-output-value
Definition: NeuralNet.h:683
std::shared_ptr< std::function< double(double)> > m_activationFunction
stores the activation function
Definition: NeuralNet.h:696
std::shared_ptr< std::function< double(double)> > activationFunction() const
fetch the activation function for this layer
Definition: NeuralNet.h:688
size_t m_numNodes
Definition: NeuralNet.h:700
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
stores the inverse activation function
Definition: NeuralNet.h:697
size_t numNodes() const
return the number of nodes of this layer
Definition: NeuralNet.h:685
ModeOutputValues m_eModeOutputValues
do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned...
Definition: NeuralNet.h:702
size_t numWeights(size_t numInputNodes) const
return the number of weights for this layer (fully connected)
Definition: NeuralNet.h:686
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
fetch the inverse activation function for this layer
Definition: NeuralNet.h:689
EnumFunction m_activationFunctionType
Definition: NeuralNet.h:703
Layer(size_t numNodes, EnumFunction activationFunction, ModeOutputValues eModeOutputValues=ModeOutputValues::DIRECT)
c'tor for defining a Layer
Definition: NeuralNet.cxx:166
EnumFunction activationFunctionType() const
get the activation function type for this layer
Definition: NeuralNet.h:691
ModeOutputValues modeOutputValues() const
get the mode-output-value (direct, probabilities)
Definition: NeuralNet.h:682
double mean() const
Definition: NeuralNet.h:126
double var_corr() const
Definition: NeuralNet.h:136
void add(T value, double weight=1.0)
Definition: NeuralNet.h:93
double stdDev_corr() const
Definition: NeuralNet.h:144
double weights() const
Definition: NeuralNet.h:125
void add(ITERATOR itBegin, ITERATOR itEnd)
Definition: NeuralNet.h:116
double var() const
Definition: NeuralNet.h:127
double stdDev() const
Definition: NeuralNet.h:145
neural net
Definition: NeuralNet.h:1062
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1092
std::vector< Layer > & layers()
returns the layers (structure)
Definition: NeuralNet.h:1246
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
Definition: NeuralNet.icc:1245
Net(const Net &other)
d'tor
Definition: NeuralNet.h:1085
bool * fExitFromTraining
Definition: NeuralNet.h:1277
std::vector< Layer > m_layers
layer-structure-data
Definition: NeuralNet.h:1272
UInt_t * fIPyMaxIter
Definition: NeuralNet.h:1278
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1283
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
Definition: NeuralNet.icc:1039
std::vector< double > container_type
Definition: NeuralNet.h:1065
container_type::iterator iterator_type
Definition: NeuralNet.h:1066
void preTrain(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
pre-training for future use
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
Definition: NeuralNet.icc:1300
size_t inputSize() const
input size of the DNN
Definition: NeuralNet.h:1098
std::pair< iterator_type, iterator_type > begin_end_type
Definition: NeuralNet.h:1067
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition: NeuralNet.h:1269
void addLayer(Layer &&layer)
Definition: NeuralNet.h:1095
size_t numNodes(size_t trainingStartLayer=0) const
returns the number of nodes in this net
Definition: NeuralNet.cxx:559
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:712
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition: NeuralNet.h:1245
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
Definition: NeuralNet.icc:1113
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1096
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1483
size_t outputSize() const
output size of the DNN
Definition: NeuralNet.h:1099
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
Definition: NeuralNet.icc:1592
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
Definition: NeuralNet.icc:1418
void removeLayer()
remove one layer
Definition: NeuralNet.h:1248
size_t m_sizeOutput
outut size of this DNN
Definition: NeuralNet.h:1271
size_t m_sizeInput
input size of this DNN
Definition: NeuralNet.h:1270
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
Definition: NeuralNet.icc:941
UInt_t * fIPyCurrentIter
Definition: NeuralNet.h:1278
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
Definition: NeuralNet.icc:1072
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
Definition: NeuralNet.icc:652
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
Definition: NeuralNet.cxx:575
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1094
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
Definition: NeuralNet.cxx:543
IPythonInteractive * fInteractive
Definition: NeuralNet.h:1276
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
Definition: NeuralNet.icc:1330
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1093
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
Definition: NeuralNet.icc:1225
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Definition: NeuralNet.icc:1367
Settings for the training of the neural net.
Definition: NeuralNet.h:730
size_t m_batchSize
mini-batch size
Definition: NeuralNet.h:838
void setDropOut(Iterator begin, Iterator end, size_t _dropRepetitions)
set the drop-out configuration (layer-wise)
Definition: NeuralNet.h:759
void create(std::string histoName, int bins, double min, double max, int bins2, double min2, double max2)
for monitoring
Definition: NeuralNet.h:820
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:815
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:813
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:827
double momentum() const
get the momentum (e.g. for SGD)
Definition: NeuralNet.h:772
Timer m_timer
timer for monitoring
Definition: NeuralNet.h:832
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:768
void clear(std::string histoName)
for monitoring
Definition: NeuralNet.h:824
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:805
MinimizerType fMinimizerType
Definition: NeuralNet.h:855
void addPoint(std::string histoName, double x, double y)
for monitoring
Definition: NeuralNet.h:822
void setMonitoring(std::shared_ptr< Monitoring > ptrMonitoring)
prepared for monitoring
Definition: NeuralNet.h:764
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:806
size_t m_convergenceSteps
number of steps without improvement to consider the DNN to have converged
Definition: NeuralNet.h:837
virtual bool hasConverged(double testError)
has this training converged already?
Definition: NeuralNet.cxx:488
MinimizerType minimizerType() const
which minimizer shall be used (e.g. SGD)
Definition: NeuralNet.h:774
std::vector< double > m_dropOut
Definition: NeuralNet.h:850
double m_minProgress
current limits for the progress bar
Definition: NeuralNet.h:833
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:799
Settings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, TMVA::DNN::EnumRegularization _regularization=TMVA::DNN::EnumRegularization::NONE, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _multithreading=true)
c'tor
Definition: NeuralNet.cxx:232
virtual void setProgressLimits(double minProgress=0, double maxProgress=100)
Definition: NeuralNet.h:790
double m_maxProgress
current limits for the progress bar
Definition: NeuralNet.h:834
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:788
virtual void drawSample(const std::vector< double > &, const std::vector< double > &, const std::vector< double > &, double)
callback for monitoring and loggging
Definition: NeuralNet.h:807
double learningRate() const
get the learning rate
Definition: NeuralNet.h:771
double m_dropRepetitions
Definition: NeuralNet.h:849
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:762
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:821
virtual ~Settings()
d'tor
Definition: NeuralNet.cxx:261
size_t m_convergenceCount
Definition: NeuralNet.h:857
EnumRegularization m_regularization
Definition: NeuralNet.h:847
int repetitions() const
how many steps have to be gone until the batch is changed
Definition: NeuralNet.h:773
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:781
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:823
virtual void startTrainCycle()
Definition: NeuralNet.h:782
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:766
double m_factorWeightDecay
Definition: NeuralNet.h:840
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:769
bool exists(std::string histoName)
for monitoring
Definition: NeuralNet.h:825
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:828
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:818
size_t m_testRepetitions
Definition: NeuralNet.h:839
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:767
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:809
std::shared_ptr< Monitoring > fMonitoring
Definition: NeuralNet.h:865
size_t dropRepetitions() const
Definition: NeuralNet.h:761
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:819
size_t minError() const
returns the smallest error so far
Definition: NeuralNet.h:829
virtual void startTraining()
Definition: NeuralNet.h:795
size_t m_maxConvergenceCount
Definition: NeuralNet.h:858
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:804
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:334
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:372
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:376
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:373
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:371
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:375
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
Definition: NeuralNet.icc:271
Steepest(double learningRate=1e-4, double momentum=0.5, size_t repetitions=10)
c'tor
Definition: NeuralNet.h:348
This class is needed by JsMVA, and it's a helper class for tracking errors during the training in Jup...
Definition: MethodBase.h:94
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:202
Basic string class.
Definition: TString.h:136
TText * text
RooCmdArg Minimizer(const char *type, const char *alg=0)
const Double_t sigma
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
static double Q[]
static double C[]
double T(double x)
Definition: ChebyshevPol.h:34
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:150
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:43
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:546
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
ModeOutputValues operator|(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:188
EnumRegularization
Definition: NeuralNet.h:173
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:412
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:572
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
ModeOutputValues operator&=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:204
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
ModeErrorFunction
error functions to be chosen from
Definition: NeuralNet.h:1039
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:458
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1050
ModeOutputValues operator|=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:193
MinimizerType
< list all the minimizer types
Definition: NeuralNet.h:321
@ fSteepest
SGD.
Definition: NeuralNet.h:322
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:35
ModeOutputValues operator&(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:199
ModeOutputValues
Definition: NeuralNet.h:179
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1294
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:212
int randomInt(int maxValue)
Definition: NeuralNet.cxx:52
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:183
std::vector< char > DropContainer
Definition: NeuralNet.h:219
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
create variable transformations
static void output(int code)
Definition: gifencode.c:226