Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
NeuralNet.h
Go to the documentation of this file.
1/**
2 * @file TMVA/NeuralNet.h
3 * @author Peter Speckmayer
4 * @version 1.0
5 *
6 * @section LICENSE
7 *
8 *
9 * @section Neural net implementation
10 *
11 * An implementation of a neural net for TMVA. This neural net uses multithreading
12 *
13 */
14
15
16//////////////////////////////////////////////////////////////////////////
17// //
18// NeuralNet //
19// //
20// A neural net implementation //
21// //
22//////////////////////////////////////////////////////////////////////////
23
24#ifndef TMVA_NEURAL_NET
25#define TMVA_NEURAL_NET
26#pragma once
27
28#include <vector>
29#include <iostream>
30#include <algorithm>
31#include <iterator>
32#include <functional>
33#include <tuple>
34#include <cmath>
35#include <cassert>
36#include <random>
37#include <thread>
38#include <future>
39#include <type_traits>
40#include <string>
41#include <utility>
42
43#include "Pattern.h"
44#include "Monitoring.h"
45
46#include "TApplication.h"
47#include "Timer.h"
48
49#include "TH1F.h"
50#include "TH2F.h"
51
52#include <fenv.h> // turn on or off exceptions for NaN and other numeric exceptions
53
54
55namespace TMVA
56{
57
58 class IPythonInteractive;
59
60 namespace DNN
61 {
62
63 // double gaussDoubl (edouble mean, double sigma);
64
65
66
67 double gaussDouble (double mean, double sigma);
68 double uniformDouble (double minValue, double maxValue);
69 int randomInt (int maxValue);
70
71
72
73
75 {
76 public:
78 : m_n(0)
79 , m_sumWeights(0)
80 , m_mean(0)
81 , m_squared(0)
82 {}
83
84 inline void clear()
85 {
86 m_n = 0;
87 m_sumWeights = 0;
88 m_mean = 0;
89 m_squared = 0;
90 }
91
92 template <typename T>
93 inline void add(T value, double weight = 1.0)
94 {
95 ++m_n; // a value has been added
96
97 if (m_n == 1) // initialization
98 {
99 m_mean = value;
100 m_squared = 0.0;
101 m_sumWeights = weight;
102 return;
103 }
104
105 double tmpWeight = m_sumWeights+weight;
106 double Q = value - m_mean;
107
108 double R = Q*weight/tmpWeight;
109 m_mean += R;
111
112 m_sumWeights = tmpWeight;
113 }
114
115 template <typename ITERATOR>
116 inline void add (ITERATOR itBegin, ITERATOR itEnd)
117 {
118 for (ITERATOR it = itBegin; it != itEnd; ++it)
119 add (*it);
120 }
121
122
123
124 inline int count() const { return m_n; }
125 inline double weights() const { if(m_n==0) return 0; return m_sumWeights; }
126 inline double mean() const { if(m_n==0) return 0; return m_mean; }
127 inline double var() const
128 {
129 if(m_n==0)
130 return 0;
131 if (m_squared <= 0)
132 return 0;
133 return (m_squared/m_sumWeights);
134 }
135
136 inline double var_corr () const
137 {
138 if (m_n <= 1)
139 return var ();
140
141 return (var()*m_n/(m_n-1)); // unbiased for small sample sizes
142 }
143
144 inline double stdDev_corr () const { return sqrt( var_corr() ); }
145 inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes
146
147 private:
148 size_t m_n;
150 double m_mean;
151 double m_squared;
152 };
153
154
155
156 enum class EnumFunction
157 {
158 ZERO = '0',
159 LINEAR = 'L',
160 TANH = 'T',
161 RELU = 'R',
162 SYMMRELU = 'r',
163 TANHSHIFT = 't',
164 SIGMOID = 's',
165 SOFTSIGN = 'S',
166 GAUSS = 'G',
167 GAUSSCOMPLEMENT = 'C'
168 };
169
170
171
173 {
174 NONE, L1, L2, L1MAX
175 };
176
177
178 enum class ModeOutputValues : int
179 {
180 DIRECT = 0x01,
181 SIGMOID = 0x02,
182 SOFTMAX = 0x04,
183 BATCHNORMALIZATION = 0x08
184 };
185
186
187
189 {
190 return (ModeOutputValues)(static_cast<std::underlying_type<ModeOutputValues>::type>(lhs) | static_cast<std::underlying_type<ModeOutputValues>::type>(rhs));
191 }
192
194 {
195 lhs = (ModeOutputValues)(static_cast<std::underlying_type<ModeOutputValues>::type>(lhs) | static_cast<std::underlying_type<ModeOutputValues>::type>(rhs));
196 return lhs;
197 }
198
200 {
201 return (ModeOutputValues)(static_cast<std::underlying_type<ModeOutputValues>::type>(lhs) & static_cast<std::underlying_type<ModeOutputValues>::type>(rhs));
202 }
203
205 {
206 lhs = (ModeOutputValues)(static_cast<std::underlying_type<ModeOutputValues>::type>(lhs) & static_cast<std::underlying_type<ModeOutputValues>::type>(rhs));
207 return lhs;
208 }
209
210
211 template <typename T>
212 bool isFlagSet (T flag, T value)
213 {
214 return (int)(value & flag) != 0;
215 }
216
217
218
219 class Net;
220
221
222
223
224
225
226
227 typedef std::vector<char> DropContainer;
228
229
230 /*! \brief The Batch class encapsulates one mini-batch
231 *
232 * Holds a const_iterator to the beginning and the end of one batch in a vector of Pattern
233 */
234 class Batch
235 {
236 public:
237 typedef typename std::vector<Pattern>::const_iterator const_iterator;
238
239 Batch (typename std::vector<Pattern>::const_iterator itBegin, typename std::vector<Pattern>::const_iterator itEnd)
240 : m_itBegin (itBegin)
241 , m_itEnd (itEnd)
242 {}
243
244 const_iterator begin () const { return m_itBegin; }
245 const_iterator end () const { return m_itEnd; }
246
247 size_t size () const { return std::distance (begin (), end ()); }
248
249 private:
250 const_iterator m_itBegin; ///< iterator denoting the beginning of the batch
251 const_iterator m_itEnd; ///< iterator denoting the end of the batch
252 };
253
254
255
256
257
258
259 template <typename ItSource, typename ItWeight, typename ItTarget>
260 void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd);
261
262
263
264 template <typename ItSource, typename ItWeight, typename ItPrev>
265 void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd);
266
267
268
269
270
271 template <typename ItValue, typename ItFunction>
272 void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction);
273
274
275 template <typename ItValue, typename ItFunction, typename ItInverseFunction, typename ItGradient>
276 void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, ItGradient itGradient);
277
278
279
280 template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
281 void update (ItSource itSource, ItSource itSourceEnd,
282 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
283 ItTargetGradient itTargetGradientBegin,
284 ItGradient itGradient);
285
286
287
288 template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
289 void update (ItSource itSource, ItSource itSourceEnd,
290 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
291 ItTargetGradient itTargetGradientBegin,
292 ItGradient itGradient,
293 ItWeight itWeight, double weightDecay);
294
295
296
297 // ----- signature of a minimizer -------------
298 // class Minimizer
299 // {
300 // public:
301
302 // template <typename Function, typename Variables, typename PassThrough>
303 // double operator() (Function& fnc, Variables& vars, PassThrough& passThrough)
304 // {
305 // // auto itVars = begin (vars);
306 // // auto itVarsEnd = end (vars);
307
308 // std::vector<double> myweights;
309 // std::vector<double> gradients;
310
311 // double value = fnc (passThrough, myweights);
312 // value = fnc (passThrough, myweights, gradients);
313 // return value;
314 // }
315 // };
316
317
318
319 ///< list all the minimizer types
321 {
322 fSteepest ///< SGD
323 };
324
325
326
327
328
329 /*! \brief Steepest Gradient Descent algorithm (SGD)
330 *
331 * Implements a steepest gradient descent minimization algorithm
332 */
334 {
335 public:
336
338
339
340 /*! \brief c'tor
341 *
342 * C'tor
343 *
344 * \param learningRate denotes the learning rate for the SGD algorithm
345 * \param momentum fraction of the velocity which is taken over from the last step
346 * \param repetitions re-compute the gradients each "repetitions" steps
347 */
348 Steepest (double learningRate = 1e-4,
349 double momentum = 0.5,
350 size_t repetitions = 10)
351 : m_repetitions (repetitions)
352 , m_alpha (learningRate)
353 , m_beta (momentum)
354 {}
355
356 /*! \brief operator to call the steepest gradient descent algorithm
357 *
358 * entry point to start the minimization procedure
359 *
360 * \param fitnessFunction (templated) function which has to be provided. This function is minimized
361 * \param weights (templated) a reference to a container of weights. The result of the minimization procedure
362 * is returned via this reference (needs to support std::begin and std::end
363 * \param passThrough (templated) object which can hold any data which the fitness function needs. This object
364 * is not touched by the minimizer; This object is provided to the fitness function when
365 * called
366 */
367 template <typename Function, typename Weights, typename PassThrough>
368 double operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough);
369
370
371 double m_alpha; ///< internal parameter (learningRate)
372 double m_beta; ///< internal parameter (momentum)
373 std::vector<double> m_prevGradients; ///< vector remembers the gradients of the previous step
374
375 std::vector<double> m_localWeights; ///< local weights for reuse in thread.
376 std::vector<double> m_localGradients; ///< local gradients for reuse in thread.
377 };
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
397 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
398
399
400
401 template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
402 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
403
404
405
406
407 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
408 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
409
410
411
412
413
414 template <typename ItWeight>
415 double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization);
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430 /*! \brief LayerData holds the data of one layer
431 *
432 * LayerData holds the data of one layer, but not its layout
433 *
434 *
435 */
437 {
438 public:
439 typedef std::vector<double> container_type;
440
441 typedef container_type::iterator iterator_type;
442 typedef container_type::const_iterator const_iterator_type;
443
444 typedef std::vector<std::function<double(double)> > function_container_type;
445 typedef function_container_type::iterator function_iterator_type;
446 typedef function_container_type::const_iterator const_function_iterator_type;
447
448 typedef DropContainer::const_iterator const_dropout_iterator;
449
450 /*! \brief c'tor of LayerData
451 *
452 * C'tor of LayerData for the input layer
453 *
454 * \param itInputBegin iterator to the begin of a vector which holds the values of the nodes of the neural net
455 * \param itInputEnd iterator to the end of a vector which holdsd the values of the nodes of the neural net
456 * \param eModeOutput indicates a potential tranformation of the output values before further computation
457 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
458 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
459 * output values (mutually exclusive probability)
460 */
462
463
464 /*! \brief c'tor of LayerData
465 *
466 * C'tor of LayerData for the input layer
467 *
468 * \param inputSize input size of this layer
469 */
470 LayerData (size_t inputSize);
472
473
474 /*! \brief c'tor of LayerData
475 *
476 * C'tor of LayerData for all layers which are not the input layer; Used during the training of the DNN
477 *
478 * \param size size of the layer
479 * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
480 * \param itGradientBegin indicates the start of the gradients for this layer on the gradient vector
481 * \param activationFunction indicates activation functions for this layer
482 * \param inverseActivationFunction indicates the inverse activation functions for this layer
483 * \param eModeOutput indicates a potential tranformation of the output values before further computation
484 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
485 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
486 * output values (mutually exclusive probability)
487 */
488 LayerData (size_t size,
489 const_iterator_type itWeightBegin,
490 iterator_type itGradientBegin,
491 std::shared_ptr<std::function<double(double)>> activationFunction,
492 std::shared_ptr<std::function<double(double)>> inverseActivationFunction,
494
495 /*! \brief c'tor of LayerData
496 *
497 * C'tor of LayerData for all layers which are not the input layer; Used during the application of the DNN
498 *
499 * \param size size of the layer
500 * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
501 * \param activationFunction indicates the activation function for this layer
502 * \param eModeOutput indicates a potential tranformation of the output values before further computation
503 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
504 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
505 * output values (mutually exclusive probability)
506 */
507 LayerData (size_t size, const_iterator_type itWeightBegin,
508 std::shared_ptr<std::function<double(double)>> activationFunction,
510
511 /*! \brief copy c'tor of LayerData
512 *
513 *
514 */
515 LayerData (const LayerData& other)
516 : m_size (other.m_size)
518 , m_itInputEnd (other.m_itInputEnd)
519 , m_deltas (other.m_deltas)
521 , m_values (other.m_values)
522 , m_itDropOut (other.m_itDropOut)
523 , m_hasDropOut (other.m_hasDropOut)
529 , m_hasWeights (other.m_hasWeights)
532 {}
533
534 /*! \brief move c'tor of LayerData
535 *
536 *
537 */
539 : m_size (other.m_size)
541 , m_itInputEnd (other.m_itInputEnd)
542 , m_deltas (std::move(other.m_deltas))
543 , m_valueGradients (std::move(other.m_valueGradients))
544 , m_values (std::move(other.m_values))
545 , m_itDropOut (other.m_itDropOut)
546 , m_hasDropOut (other.m_hasDropOut)
549 , m_activationFunction (std::move(other.m_activationFunction))
552 , m_hasWeights (other.m_hasWeights)
555 {}
556
557
558 /*! \brief change the input iterators
559 *
560 *
561 * \param itInputBegin indicates the start of the input node vector
562 * \param itInputEnd indicates the end of the input node vector
563 *
564 */
565 void setInput (const_iterator_type itInputBegin, const_iterator_type itInputEnd)
566 {
567 m_isInputLayer = true;
568 m_itInputBegin = itInputBegin;
569 m_itInputEnd = itInputEnd;
570 }
571
572 /*! \brief clear the values and the deltas
573 *
574 *
575 */
576 void clear ()
577 {
578 m_values.assign (m_values.size (), 0.0);
579 m_deltas.assign (m_deltas.size (), 0.0);
580 }
581
582 const_iterator_type valuesBegin () const { return m_isInputLayer ? m_itInputBegin : begin (m_values); } ///< returns const iterator to the begin of the (node) values
583 const_iterator_type valuesEnd () const { return m_isInputLayer ? m_itInputEnd : end (m_values); } ///< returns iterator to the end of the (node) values
584
585 iterator_type valuesBegin () { assert (!m_isInputLayer); return begin (m_values); } ///< returns iterator to the begin of the (node) values
586 iterator_type valuesEnd () { assert (!m_isInputLayer); return end (m_values); } ///< returns iterator to the end of the (node) values
587
588 ModeOutputValues outputMode () const { return m_eModeOutput; } ///< returns the output mode
589 container_type probabilities () const { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them
590
591 iterator_type deltasBegin () { return begin (m_deltas); } ///< returns iterator to the begin of the deltas (back-propagation)
592 iterator_type deltasEnd () { return end (m_deltas); } ///< returns iterator to the end of the deltas (back-propagation)
593
594 const_iterator_type deltasBegin () const { return begin (m_deltas); } ///< returns const iterator to the begin of the deltas (back-propagation)
595 const_iterator_type deltasEnd () const { return end (m_deltas); } ///< returns const iterator to the end of the deltas (back-propagation)
596
597 iterator_type valueGradientsBegin () { return begin (m_valueGradients); } ///< returns iterator to the begin of the gradients of the node values
598 iterator_type valueGradientsEnd () { return end (m_valueGradients); } ///< returns iterator to the end of the gradients of the node values
599
600 const_iterator_type valueGradientsBegin () const { return begin (m_valueGradients); } ///< returns const iterator to the begin of the gradients
601 const_iterator_type valueGradientsEnd () const { return end (m_valueGradients); } ///< returns const iterator to the end of the gradients
602
603 iterator_type gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients
604 const_iterator_type gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients
605 const_iterator_type weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer
606
607 std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; }
608 std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; }
609
610 /*! \brief set the drop-out info for this layer
611 *
612 */
613 template <typename Iterator>
614 void setDropOut (Iterator itDrop) { m_itDropOut = itDrop; m_hasDropOut = true; }
615
616 /*! \brief clear the drop-out-data for this layer
617 *
618 *
619 */
620 void clearDropOut () { m_hasDropOut = false; }
621
622 bool hasDropOut () const { return m_hasDropOut; } ///< has this layer drop-out turned on?
623 const_dropout_iterator dropOut () const { assert (m_hasDropOut); return m_itDropOut; } ///< return the begin of the drop-out information
624
625 size_t size () const { return m_size; } ///< return the size of the layer
626
627 private:
628
629 /*! \brief compute the probabilities from the node values
630 *
631 *
632 */
634
635 private:
636
637 size_t m_size; ////< layer size
638
639 const_iterator_type m_itInputBegin; ///< iterator to the first of the nodes in the input node vector
640 const_iterator_type m_itInputEnd; ///< iterator to the end of the nodes in the input node vector
641
642 std::vector<double> m_deltas; ///< stores the deltas for the DNN training
643 std::vector<double> m_valueGradients; ///< stores the gradients of the values (nodes)
644 std::vector<double> m_values; ///< stores the values of the nodes in this layer
645 const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped
646 bool m_hasDropOut; ///< dropOut is turned on?
647
648 const_iterator_type m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector
649 iterator_type m_itGradientBegin; ///< iterator to the first gradient of this layer in the gradient vector
650
651 std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< activation function for this layer
652 std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< inverse activation function for this layer
653
654 bool m_isInputLayer; ///< is this layer an input layer
655 bool m_hasWeights; ///< does this layer have weights (it does not if it is the input layer)
656 bool m_hasGradients; ///< does this layer have gradients (only if in training mode)
657
658 ModeOutputValues m_eModeOutput; ///< stores the output mode (DIRECT, SIGMOID, SOFTMAX)
659
660 };
661
662
663
664
665
666 /*! \brief Layer defines the layout of a layer
667 *
668 * Layer defines the layout of a specific layer in the DNN
669 * Objects of this class don't hold the layer data itself (see class "LayerData")
670 *
671 */
672 class Layer
673 {
674 public:
675
676 /*! \brief c'tor for defining a Layer
677 *
678 *
679 */
681
682 ModeOutputValues modeOutputValues () const { return m_eModeOutputValues; } ///< get the mode-output-value (direct, probabilities)
683 void modeOutputValues (ModeOutputValues eModeOutputValues) { m_eModeOutputValues = eModeOutputValues; } ///< set the mode-output-value
684
685 size_t numNodes () const { return m_numNodes; } ///< return the number of nodes of this layer
686 size_t numWeights (size_t numInputNodes) const { return numInputNodes * numNodes (); } ///< return the number of weights for this layer (fully connected)
687
688 std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; } ///< fetch the activation function for this layer
689 std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; } ///< fetch the inverse activation function for this layer
690
691 EnumFunction activationFunctionType () const { return m_activationFunctionType; } ///< get the activation function type for this layer
692
693 private:
694
695
696 std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< stores the activation function
697 std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< stores the inverse activation function
698
699
701
702 ModeOutputValues m_eModeOutputValues; ///< do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned as such
704
705 friend class Net;
706 };
707
708
709
710
711
712 template <typename LAYERDATA>
713 void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
714
715
716 template <typename LAYERDATA>
717 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
718
719
720 template <typename LAYERDATA>
721 void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization);
722
723
724
725 /*! \brief Settings for the training of the neural net
726 *
727 *
728 */
730 {
731 public:
732
733 /*! \brief c'tor
734 *
735 *
736 */
738 size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
739 double _factorWeightDecay = 1e-5, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE,
740 MinimizerType _eMinimizerType = MinimizerType::fSteepest,
741 double _learningRate = 1e-5, double _momentum = 0.3,
742 int _repetitions = 3,
743 bool _multithreading = true);
744
745 /*! \brief d'tor
746 *
747 *
748 */
749 virtual ~Settings ();
750
751
752 /*! \brief set the drop-out configuration (layer-wise)
753 *
754 * \param begin begin of an array or vector denoting the drop-out probabilities for each layer
755 * \param end end of an array or vector denoting the drop-out probabilities for each layer
756 * \param _dropRepetitions denotes after how many repetitions the drop-out setting (which nodes are dropped out exactly) is changed
757 */
758 template <typename Iterator>
759 void setDropOut (Iterator begin, Iterator end, size_t _dropRepetitions) { m_dropOut.assign (begin, end); m_dropRepetitions = _dropRepetitions; }
760
761 size_t dropRepetitions () const { return m_dropRepetitions; }
762 const std::vector<double>& dropFractions () const { return m_dropOut; }
763
764 void setMonitoring (std::shared_ptr<Monitoring> ptrMonitoring) { fMonitoring = ptrMonitoring; } ///< prepared for monitoring
765
766 size_t convergenceSteps () const { return m_convergenceSteps; } ///< how many steps until training is deemed to have converged
767 size_t batchSize () const { return m_batchSize; } ///< mini-batch size
768 size_t testRepetitions () const { return m_testRepetitions; } ///< how often is the test data tested
769 double factorWeightDecay () const { return m_factorWeightDecay; } ///< get the weight-decay factor
770
771 double learningRate () const { return fLearningRate; } ///< get the learning rate
772 double momentum () const { return fMomentum; } ///< get the momentum (e.g. for SGD)
773 int repetitions () const { return fRepetitions; } ///< how many steps have to be gone until the batch is changed
774 MinimizerType minimizerType () const { return fMinimizerType; } ///< which minimizer shall be used (e.g. SGD)
775
776
777
778
779
780
781 virtual void testSample (double /*error*/, double /*output*/, double /*target*/, double /*weight*/) {} ///< virtual function to be used for monitoring (callback)
782 virtual void startTrainCycle () ///< callback for monitoring and logging
783 {
786 m_minError = 1e10;
787 }
788 virtual void endTrainCycle (double /*error*/) {} ///< callback for monitoring and logging
789
790 virtual void setProgressLimits (double minProgress = 0, double maxProgress = 100) ///< for monitoring and logging (set the current "progress" limits for the display of the progress)
791 {
792 m_minProgress = minProgress;
793 m_maxProgress = maxProgress;
794 }
795 virtual void startTraining () ///< start drawing the progress bar
796 {
798 }
799 virtual void cycle (double progress, TString text) ///< advance on the progress bar
800 {
802 }
803
804 virtual void startTestCycle () {} ///< callback for monitoring and loggging
805 virtual void endTestCycle () {} ///< callback for monitoring and loggging
806 virtual void testIteration () {} ///< callback for monitoring and loggging
807 virtual void drawSample (const std::vector<double>& /*input*/, const std::vector<double>& /* output */, const std::vector<double>& /* target */, double /* patternWeight */) {} ///< callback for monitoring and loggging
808
809 virtual void computeResult (const Net& /* net */, std::vector<double>& /* weights */) {} ///< callback for monitoring and loggging
810
811 virtual bool hasConverged (double testError); ///< has this training converged already?
812
813 EnumRegularization regularization () const { return m_regularization; } ///< some regularization of the DNN is turned on?
814
815 bool useMultithreading () const { return m_useMultithreading; } ///< is multithreading turned on?
816
817
818 void pads (int numPads) { if (fMonitoring) fMonitoring->pads (numPads); } ///< preparation for monitoring
819 void create (std::string histoName, int bins, double min, double max) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max); } ///< for monitoring
820 void create (std::string histoName, int bins, double min, double max, int bins2, double min2, double max2) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max, bins2, min2, max2); } ///< for monitoring
821 void addPoint (std::string histoName, double x) { if (fMonitoring) fMonitoring->addPoint (histoName, x); } ///< for monitoring
822 void addPoint (std::string histoName, double x, double y) {if (fMonitoring) fMonitoring->addPoint (histoName, x, y); } ///< for monitoring
823 void plot (std::string histoName, std::string options, int pad, EColor color) { if (fMonitoring) fMonitoring->plot (histoName, options, pad, color); } ///< for monitoring
824 void clear (std::string histoName) { if (fMonitoring) fMonitoring->clear (histoName); } ///< for monitoring
825 bool exists (std::string histoName) { if (fMonitoring) return fMonitoring->exists (histoName); return false; } ///< for monitoring
826
827 size_t convergenceCount () const { return m_convergenceCount; } ///< returns the current convergence count
828 size_t maxConvergenceCount () const { return m_maxConvergenceCount; } ///< returns the max convergence count so far
829 size_t minError () const { return m_minError; } ///< returns the smallest error so far
830
831 public:
832 Timer m_timer; ///< timer for monitoring
833 double m_minProgress; ///< current limits for the progress bar
834 double m_maxProgress; ///< current limits for the progress bar
835
836
837 size_t m_convergenceSteps; ///< number of steps without improvement to consider the DNN to have converged
838 size_t m_batchSize; ///< mini-batch size
841
842 size_t count_E;
843 size_t count_dE;
846
848
850 std::vector<double> m_dropOut;
851
853 double fMomentum;
856
860
861
862 protected:
864
865 std::shared_ptr<Monitoring> fMonitoring;
866 };
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890 /*! \brief Settings for classification
891 *
892 * contains additional settings if the DNN problem is classification
893 */
895 {
896 public:
897 /*! \brief c'tor
898 *
899 *
900 */
902 size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
903 double _factorWeightDecay = 1e-5, EnumRegularization _regularization = EnumRegularization::NONE,
904 size_t _scaleToNumEvents = 0, MinimizerType _eMinimizerType = MinimizerType::fSteepest,
905 double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3,
906 bool _useMultithreading = true)
907 : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay,
908 _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading)
909 , m_ams ()
912 , m_scaleToNumEvents (_scaleToNumEvents)
913 , m_cutValue (10.0)
917 {
918 }
919
920 /*! \brief d'tor
921 *
922 *
923 */
925 {
926 }
927
928 void startTrainCycle ();
929 void endTrainCycle (double /*error*/);
930 void testIteration () { if (fMonitoring) fMonitoring->ProcessEvents (); }
931
932
933 /* void createHistograms () */
934 /* { */
935 /* std::cout << "is hist ROC existing?" << std::endl; */
936 /* if (m_histROC) */
937 /* { */
938 /* std::cout << "--> yes" << std::endl; */
939 /* fMonitoring->ProcessEvents (); */
940 /* return; */
941 /* } */
942
943 /* std::cout << "create histograms" << std::endl; */
944 /* TCanvas* canvas = fMonitoring->GetCanvas (); */
945 /* if (canvas) */
946 /* { */
947 /* std::cout << "canvas divide" << std::endl; */
948 /* canvas->cd (); */
949 /* canvas->Divide (2,2); */
950 /* } */
951 /* if (!m_histROC) */
952 /* { */
953 /* m_histROC = new TH2F ("ROC","ROC", 1000, 0, 1.0, 1000, 0, 1.0); m_histROC->SetDirectory (0); */
954 /* m_histROC->SetLineColor (kBlue); */
955 /* } */
956 /* if (!m_histSignificance) */
957 /* { */
958 /* m_histSignificance = new TH2F ("Significance", "Significance", 1000, 0,1.0, 5, 0.0, 2.0); */
959 /* m_histSignificance->SetDirectory (0); */
960 /* m_histSignificance->SetBit (TH1::kCanRebin); */
961 /* m_histROC->SetLineColor (kRed); */
962 /* } */
963 /* if (!m_histError) */
964 /* { */
965 /* m_histError = new TH1F ("Error", "Error", 100, 0, 100); */
966 /* m_histError->SetDirectory (0); */
967 /* m_histError->SetBit (TH1::kCanRebin); */
968 /* m_histROC->SetLineColor (kGreen); */
969 /* } */
970 /* if (!m_histOutputSignal) */
971 /* { */
972 /* m_histOutputSignal = new TH1F ("Signal", "Signal", 100, 0, 1.0); */
973 /* m_histOutputSignal->SetDirectory (0); */
974 /* m_histOutputSignal->SetBit (TH1::kCanRebin); */
975 /* } */
976 /* if (!m_histOutputBackground) */
977 /* { */
978 /* m_histOutputBackground = new TH1F ("Background", "Background", 100, 0, 1.0); */
979 /* m_histOutputBackground->SetDirectory (0); */
980 /* m_histOutputBackground->SetBit (TH1::kCanRebin); */
981 /* } */
982
983 /* fMonitoring->ProcessEvents (); */
984 /* } */
985
986 void testSample (double error, double output, double target, double weight);
987
988 virtual void startTestCycle ();
989 virtual void endTestCycle ();
990
991
992 void setWeightSums (double sumOfSigWeights, double sumOfBkgWeights);
993 void setResultComputation (std::string _fileNameNetConfig, std::string _fileNameResult, std::vector<Pattern>* _resultPatternContainer);
994
995 std::vector<double> m_input;
996 std::vector<double> m_output;
997 std::vector<double> m_targets;
998 std::vector<double> m_weights;
999
1000 std::vector<double> m_ams;
1001 std::vector<double> m_significances;
1002
1003
1007
1009 std::vector<Pattern>* m_pResultPatternContainer;
1010 std::string m_fileNameResult;
1012
1013
1014 /* TH2F* m_histROC; */
1015 /* TH2F* m_histSignificance; */
1016
1017 /* TH1F* m_histError; */
1018 /* TH1F* m_histOutputSignal; */
1019 /* TH1F* m_histOutputBackground; */
1020 };
1021
1022
1023
1024
1025
1026
1027
1028 ///< used to distinguish between different function signatures
1029 enum class ModeOutput
1030 {
1031 FETCH
1032 };
1033
1034 /*! \brief error functions to be chosen from
1035 *
1036 *
1037 */
1039 {
1040 SUMOFSQUARES = 'S',
1041 CROSSENTROPY = 'C',
1043 };
1044
1045 /*! \brief weight initialization strategies to be chosen from
1046 *
1047 *
1048 */
1050 {
1052 };
1053
1054
1055
1056 /*! \brief neural net
1057 *
1058 * holds the structure of all layers and some data for the whole net
1059 * does not know the layer data though (i.e. values of the nodes and weights)
1060 */
1061 class Net
1062 {
1063 public:
1064
1065 typedef std::vector<double> container_type;
1066 typedef container_type::iterator iterator_type;
1067 typedef std::pair<iterator_type,iterator_type> begin_end_type;
1068
1069
1070 /*! \brief c'tor
1071 *
1072 *
1073 */
1076 , m_sizeInput (0)
1077 , m_layers ()
1078 {
1079 }
1080
1081 /*! \brief d'tor
1082 *
1083 *
1084 */
1085 Net (const Net& other)
1087 , m_sizeInput (other.m_sizeInput)
1088 , m_layers (other.m_layers)
1089 {
1090 }
1091
1092 void setInputSize (size_t sizeInput) { m_sizeInput = sizeInput; } ///< set the input size of the DNN
1093 void setOutputSize (size_t sizeOutput) { m_sizeOutput = sizeOutput; } ///< set the output size of the DNN
1094 void addLayer (Layer& layer) { m_layers.push_back (layer); } ///< add a layer (layout)
1095 void addLayer (Layer&& layer) { m_layers.push_back (layer); }
1096 void setErrorFunction (ModeErrorFunction eErrorFunction) { m_eErrorFunction = eErrorFunction; } ///< which error function is to be used
1097
1098 size_t inputSize () const { return m_sizeInput; } ///< input size of the DNN
1099 size_t outputSize () const { return m_sizeOutput; } ///< output size of the DNN
1100
1101 /*! \brief set the drop out configuration
1102 *
1103 *
1104 */
1105 template <typename WeightsType, typename DropProbabilities>
1106 void dropOutWeightFactor (WeightsType& weights,
1107 const DropProbabilities& drops,
1108 bool inverse = false);
1109
1110 /*! \brief start the training
1111 *
1112 * \param weights weight vector
1113 * \param trainPattern training pattern
1114 * \param testPattern test pattern
1115 * \param minimizer use this minimizer for training (e.g. SGD)
1116 * \param settings settings used for this training run
1117 */
1118 template <typename Minimizer>
1119 double train (std::vector<double>& weights,
1120 std::vector<Pattern>& trainPattern,
1121 const std::vector<Pattern>& testPattern,
1122 Minimizer& minimizer,
1123 Settings& settings);
1124
1125 /*! \brief pre-training for future use
1126 *
1127 *
1128 */
1129 template <typename Minimizer>
1130 void preTrain (std::vector<double>& weights,
1131 std::vector<Pattern>& trainPattern,
1132 const std::vector<Pattern>& testPattern,
1133 Minimizer& minimizer, Settings& settings);
1134
1135
1136 /*! \brief executes one training cycle
1137 *
1138 * \param minimizer the minimizer to be used
1139 * \param weights the weight vector to be used
1140 * \param itPatternBegin the pattern to be trained with
1141 * \param itPatternEnd the pattern to be trainied with
1142 * \param settings the settings for the training
1143 * \param dropContainer the configuration for DNN drop-out
1144 */
1145 template <typename Iterator, typename Minimizer>
1146 inline double trainCycle (Minimizer& minimizer, std::vector<double>& weights,
1147 Iterator itPatternBegin, Iterator itPatternEnd,
1148 Settings& settings,
1149 DropContainer& dropContainer);
1150
1151 size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net
1152 size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net
1153
1154 template <typename Weights>
1155 std::vector<double> compute (const std::vector<double>& input, const Weights& weights) const; ///< compute the net with the given input and the given weights
1156
1157 template <typename Weights, typename PassThrough>
1158 double operator() (PassThrough& settingsAndBatch, const Weights& weights) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients
1159
1160 template <typename Weights, typename PassThrough, typename OutContainer>
1161 double operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput eFetch, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function
1162
1163 template <typename Weights, typename Gradients, typename PassThrough>
1164 double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well
1165
1166 template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1167 double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const;
1168
1169
1170 template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1171 std::vector<std::vector<LayerData>> prepareLayerData (LayerContainer& layers,
1172 Batch& batch,
1173 const DropContainer& dropContainer,
1174 ItWeight itWeightBegin,
1175 ItWeight itWeightEnd,
1176 ItGradient itGradientBegin,
1177 ItGradient itGradientEnd,
1178 size_t& totalNumWeights) const;
1179
1180 template <typename LayerContainer>
1181 void forwardPattern (const LayerContainer& _layers,
1182 std::vector<LayerData>& layerData) const;
1183
1184
1185 template <typename LayerContainer, typename LayerPatternContainer>
1186 void forwardBatch (const LayerContainer& _layers,
1187 LayerPatternContainer& layerPatternData,
1188 std::vector<double>& valuesMean,
1189 std::vector<double>& valuesStdDev,
1190 size_t trainFromLayer) const;
1191
1192 template <typename OutputContainer>
1193 void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const;
1194
1195 template <typename OutputContainer>
1196 void fetchOutput (const std::vector<LayerData>& layerPatternData, OutputContainer& outputContainer) const;
1197
1198
1199 template <typename ItWeight>
1200 std::tuple</*sumError*/double,/*sumWeights*/double> computeError (const Settings& settings,
1201 std::vector<LayerData>& lastLayerData,
1202 Batch& batch,
1203 ItWeight itWeightBegin,
1204 ItWeight itWeightEnd) const;
1205
1206 template <typename Settings>
1207 void backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1208 const Settings& settings,
1209 size_t trainFromLayer,
1210 size_t totalNumWeights) const;
1211
1212
1213
1214 /*! \brief main NN computation function
1215 *
1216 *
1217 */
1218 template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1219 double forward_backward (LayerContainer& layers, PassThrough& settingsAndBatch,
1220 ItWeight itWeightBegin, ItWeight itWeightEnd,
1221 ItGradient itGradientBegin, ItGradient itGradientEnd,
1222 size_t trainFromLayer,
1223 OutContainer& outputContainer, bool fetchOutput) const;
1224
1225
1226
1227 double E ();
1228 void dE ();
1229
1230
1231 /*! \brief computes the error of the DNN
1232 *
1233 *
1234 */
1235 template <typename Container, typename ItWeight>
1236 double errorFunction (LayerData& layerData,
1237 Container truth,
1238 ItWeight itWeight,
1239 ItWeight itWeightEnd,
1240 double patternWeight,
1241 double factorWeightDecay,
1242 EnumRegularization eRegularization) const;
1243
1244
1245 const std::vector<Layer>& layers () const { return m_layers; } ///< returns the layers (structure)
1246 std::vector<Layer>& layers () { return m_layers; } ///< returns the layers (structure)
1247
1248 void removeLayer () { m_layers.pop_back (); } ///< remove one layer
1249
1250
1251 void clear () ///< clear one layer
1252 {
1253 m_layers.clear ();
1255 }
1256
1257
1258 template <typename OutIterator>
1260 OutIterator itWeight); ///< initialize the weights with the given strategy
1261
1262 protected:
1263
1264 void fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t numNodes) const; ///< prepare the drop-out-container (select the nodes which are to be dropped out)
1265
1266
1267 private:
1268
1269 ModeErrorFunction m_eErrorFunction; ///< denotes the error function
1270 size_t m_sizeInput; ///< input size of this DNN
1271 size_t m_sizeOutput; ///< outut size of this DNN
1272 std::vector<Layer> m_layers; ///< layer-structure-data
1273
1274 protected:
1275 // variables for JsMVA (interactive training in jupyter notebook)
1277 bool * fExitFromTraining = nullptr;
1278 UInt_t *fIPyMaxIter = nullptr, *fIPyCurrentIter = nullptr;
1279
1280 public:
1281
1282 // setup ipython interactive variables
1284 fInteractive = fI;
1285 fExitFromTraining = fE;
1286 fIPyMaxIter = M;
1287 fIPyCurrentIter = C;
1288 }
1289 };
1290
1291
1292
1293
1294typedef std::tuple<Settings&, Batch&, DropContainer&> pass_through_type;
1295
1296
1297
1298
1299
1300
1301
1302 } // namespace DNN
1303} // namespace TMVA
1304
1305
1306// include the implementations (in header file, because they are templated)
1307#include "TMVA/NeuralNet.icc"
1308
1309#endif
1310
double
#define R(a, b, c, d, e, f, g, h, i)
Definition RSha256.hxx:110
#define e(i)
Definition RSha256.hxx:103
int Int_t
Definition RtypesCore.h:45
EColor
Definition Rtypes.h:65
char name[80]
Definition TGX11.cxx:110
double sqrt(double)
Double_t(* Function)(Double_t)
Definition Functor.C:4
The Batch class encapsulates one mini-batch.
Definition NeuralNet.h:235
const_iterator m_itEnd
iterator denoting the end of the batch
Definition NeuralNet.h:251
const_iterator begin() const
Definition NeuralNet.h:244
const_iterator end() const
Definition NeuralNet.h:245
Batch(typename std::vector< Pattern >::const_iterator itBegin, typename std::vector< Pattern >::const_iterator itEnd)
Definition NeuralNet.h:239
size_t size() const
Definition NeuralNet.h:247
std::vector< Pattern >::const_iterator const_iterator
Definition NeuralNet.h:237
const_iterator m_itBegin
iterator denoting the beginning of the batch
Definition NeuralNet.h:250
Settings for classificationused to distinguish between different function signatures.
Definition NeuralNet.h:895
void startTrainCycle()
action to be done when the training cycle is started (e.g.
void testIteration()
callback for monitoring and loggging
Definition NeuralNet.h:930
void endTrainCycle(double)
action to be done when the training cycle is ended (e.g.
virtual void endTestCycle()
action to be done when the training cycle is ended (e.g.
std::vector< Pattern > * m_pResultPatternContainer
Definition NeuralNet.h:1009
void setResultComputation(std::string _fileNameNetConfig, std::string _fileNameResult, std::vector< Pattern > *_resultPatternContainer)
preparation for monitoring output
ClassificationSettings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, EnumRegularization _regularization=EnumRegularization::NONE, size_t _scaleToNumEvents=0, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _useMultithreading=true)
c'tor
Definition NeuralNet.h:901
std::vector< double > m_input
Definition NeuralNet.h:995
std::vector< double > m_significances
Definition NeuralNet.h:1001
std::vector< double > m_weights
Definition NeuralNet.h:998
virtual ~ClassificationSettings()
d'tor
Definition NeuralNet.h:924
std::vector< double > m_targets
Definition NeuralNet.h:997
void testSample(double error, double output, double target, double weight)
action to be done after the computation of a test sample (e.g.
virtual void startTestCycle()
action to be done when the test cycle is started (e.g.
void setWeightSums(double sumOfSigWeights, double sumOfBkgWeights)
set the weight sums to be scaled to (preparations for monitoring output)
std::vector< double > m_ams
Definition NeuralNet.h:1000
std::vector< double > m_output
Definition NeuralNet.h:996
LayerData holds the data of one layer.
Definition NeuralNet.h:437
const_iterator_type m_itInputBegin
iterator to the first of the nodes in the input node vector
Definition NeuralNet.h:639
const_iterator_type deltasBegin() const
returns const iterator to the begin of the deltas (back-propagation)
Definition NeuralNet.h:594
iterator_type valuesBegin()
returns iterator to the begin of the (node) values
Definition NeuralNet.h:585
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition NeuralNet.h:583
bool m_hasGradients
does this layer have gradients (only if in training mode)
Definition NeuralNet.h:656
std::vector< double > m_deltas
stores the deltas for the DNN training
Definition NeuralNet.h:642
container_type::iterator iterator_type
Definition NeuralNet.h:441
void setDropOut(Iterator itDrop)
set the drop-out info for this layer
Definition NeuralNet.h:614
void setInput(const_iterator_type itInputBegin, const_iterator_type itInputEnd)
change the input iterators
Definition NeuralNet.h:565
std::vector< std::function< double(double)> > function_container_type
Definition NeuralNet.h:444
iterator_type valuesEnd()
returns iterator to the end of the (node) values
Definition NeuralNet.h:586
const_dropout_iterator m_itDropOut
iterator to a container indicating if the corresponding node is to be dropped
Definition NeuralNet.h:645
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition NeuralNet.h:597
iterator_type gradientsBegin()
returns iterator to the begin of the gradients
Definition NeuralNet.h:603
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition NeuralNet.h:591
bool m_hasWeights
does this layer have weights (it does not if it is the input layer)
Definition NeuralNet.h:655
const_dropout_iterator dropOut() const
return the begin of the drop-out information
Definition NeuralNet.h:623
LayerData(LayerData &&other)
move c'tor of LayerData
Definition NeuralNet.h:538
std::vector< double > container_type
Definition NeuralNet.h:439
size_t size() const
return the size of the layer
Definition NeuralNet.h:625
const_iterator_type weightsBegin() const
returns const iterator to the begin of the weights for this layer
Definition NeuralNet.h:605
function_container_type::const_iterator const_function_iterator_type
Definition NeuralNet.h:446
LayerData(const LayerData &other)
copy c'tor of LayerData
Definition NeuralNet.h:515
function_container_type::iterator function_iterator_type
Definition NeuralNet.h:445
std::vector< double > m_values
stores the values of the nodes in this layer
Definition NeuralNet.h:644
const_iterator_type m_itInputEnd
iterator to the end of the nodes in the input node vector
Definition NeuralNet.h:640
container_type::const_iterator const_iterator_type
Definition NeuralNet.h:442
ModeOutputValues outputMode() const
returns the output mode
Definition NeuralNet.h:588
iterator_type m_itGradientBegin
iterator to the first gradient of this layer in the gradient vector
Definition NeuralNet.h:649
const_iterator_type gradientsBegin() const
returns const iterator to the begin of the gradients
Definition NeuralNet.h:604
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition NeuralNet.h:608
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition NeuralNet.h:592
std::vector< double > m_valueGradients
stores the gradients of the values (nodes)
Definition NeuralNet.h:643
const_iterator_type m_itConstWeightBegin
const iterator to the first weight of this layer in the weight vector
Definition NeuralNet.h:648
iterator_type valueGradientsEnd()
returns iterator to the end of the gradients of the node values
Definition NeuralNet.h:598
void clear()
clear the values and the deltas
Definition NeuralNet.h:576
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition NeuralNet.h:607
container_type computeProbabilities() const
compute the probabilities from the node values
const_iterator_type deltasEnd() const
returns const iterator to the end of the deltas (back-propagation)
Definition NeuralNet.h:595
bool m_hasDropOut
dropOut is turned on?
Definition NeuralNet.h:646
bool m_isInputLayer
is this layer an input layer
Definition NeuralNet.h:654
bool hasDropOut() const
has this layer drop-out turned on?
Definition NeuralNet.h:622
const_iterator_type valueGradientsBegin() const
returns const iterator to the begin of the gradients
Definition NeuralNet.h:600
const_iterator_type valueGradientsEnd() const
returns const iterator to the end of the gradients
Definition NeuralNet.h:601
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition NeuralNet.h:589
void clearDropOut()
clear the drop-out-data for this layer
Definition NeuralNet.h:620
ModeOutputValues m_eModeOutput
stores the output mode (DIRECT, SIGMOID, SOFTMAX)
Definition NeuralNet.h:658
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
inverse activation function for this layer
Definition NeuralNet.h:652
DropContainer::const_iterator const_dropout_iterator
Definition NeuralNet.h:448
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition NeuralNet.h:582
std::shared_ptr< std::function< double(double)> > m_activationFunction
activation function for this layer
Definition NeuralNet.h:651
Layer defines the layout of a layer.
Definition NeuralNet.h:673
void modeOutputValues(ModeOutputValues eModeOutputValues)
set the mode-output-value
Definition NeuralNet.h:683
std::shared_ptr< std::function< double(double)> > m_activationFunction
stores the activation function
Definition NeuralNet.h:696
std::shared_ptr< std::function< double(double)> > activationFunction() const
fetch the activation function for this layer
Definition NeuralNet.h:688
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
stores the inverse activation function
Definition NeuralNet.h:697
size_t numNodes() const
return the number of nodes of this layer
Definition NeuralNet.h:685
ModeOutputValues m_eModeOutputValues
do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned...
Definition NeuralNet.h:702
size_t numWeights(size_t numInputNodes) const
return the number of weights for this layer (fully connected)
Definition NeuralNet.h:686
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
fetch the inverse activation function for this layer
Definition NeuralNet.h:689
EnumFunction m_activationFunctionType
Definition NeuralNet.h:703
EnumFunction activationFunctionType() const
get the activation function type for this layer
Definition NeuralNet.h:691
ModeOutputValues modeOutputValues() const
get the mode-output-value (direct, probabilities)
Definition NeuralNet.h:682
double mean() const
Definition NeuralNet.h:126
double var_corr() const
Definition NeuralNet.h:136
void add(T value, double weight=1.0)
Definition NeuralNet.h:93
double stdDev_corr() const
Definition NeuralNet.h:144
double weights() const
Definition NeuralNet.h:125
void add(ITERATOR itBegin, ITERATOR itEnd)
Definition NeuralNet.h:116
double var() const
Definition NeuralNet.h:127
double stdDev() const
Definition NeuralNet.h:145
neural net
Definition NeuralNet.h:1062
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition NeuralNet.h:1092
std::vector< Layer > & layers()
returns the layers (structure)
Definition NeuralNet.h:1246
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
Net(const Net &other)
d'tor
Definition NeuralNet.h:1085
bool * fExitFromTraining
Definition NeuralNet.h:1277
std::vector< Layer > m_layers
layer-structure-data
Definition NeuralNet.h:1272
UInt_t * fIPyMaxIter
Definition NeuralNet.h:1278
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition NeuralNet.h:1283
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
std::vector< double > container_type
Definition NeuralNet.h:1065
container_type::iterator iterator_type
Definition NeuralNet.h:1066
void preTrain(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
pre-training for future use
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
size_t inputSize() const
input size of the DNN
Definition NeuralNet.h:1098
std::pair< iterator_type, iterator_type > begin_end_type
Definition NeuralNet.h:1067
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition NeuralNet.h:1269
void addLayer(Layer &&layer)
Definition NeuralNet.h:1095
size_t numNodes(size_t trainingStartLayer=0) const
returns the number of nodes in this net
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition NeuralNet.h:1245
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition NeuralNet.h:1096
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
size_t outputSize() const
output size of the DNN
Definition NeuralNet.h:1099
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
void removeLayer()
remove one layer
Definition NeuralNet.h:1248
size_t m_sizeOutput
outut size of this DNN
Definition NeuralNet.h:1271
size_t m_sizeInput
input size of this DNN
Definition NeuralNet.h:1270
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
UInt_t * fIPyCurrentIter
Definition NeuralNet.h:1278
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
void addLayer(Layer &layer)
add a layer (layout)
Definition NeuralNet.h:1094
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
IPythonInteractive * fInteractive
Definition NeuralNet.h:1276
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition NeuralNet.h:1093
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Settings for the training of the neural net.
Definition NeuralNet.h:730
size_t m_batchSize
mini-batch size
Definition NeuralNet.h:838
void setDropOut(Iterator begin, Iterator end, size_t _dropRepetitions)
set the drop-out configuration (layer-wise)
Definition NeuralNet.h:759
void create(std::string histoName, int bins, double min, double max, int bins2, double min2, double max2)
for monitoring
Definition NeuralNet.h:820
bool useMultithreading() const
is multithreading turned on?
Definition NeuralNet.h:815
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition NeuralNet.h:813
size_t convergenceCount() const
returns the current convergence count
Definition NeuralNet.h:827
double momentum() const
get the momentum (e.g. for SGD)
Definition NeuralNet.h:772
Timer m_timer
timer for monitoring
Definition NeuralNet.h:832
size_t testRepetitions() const
how often is the test data tested
Definition NeuralNet.h:768
void clear(std::string histoName)
for monitoring
Definition NeuralNet.h:824
virtual void endTestCycle()
callback for monitoring and loggging
Definition NeuralNet.h:805
MinimizerType fMinimizerType
Definition NeuralNet.h:855
void addPoint(std::string histoName, double x, double y)
for monitoring
Definition NeuralNet.h:822
void setMonitoring(std::shared_ptr< Monitoring > ptrMonitoring)
prepared for monitoring
Definition NeuralNet.h:764
virtual void testIteration()
callback for monitoring and loggging
Definition NeuralNet.h:806
size_t m_convergenceSteps
number of steps without improvement to consider the DNN to have converged
Definition NeuralNet.h:837
virtual bool hasConverged(double testError)
has this training converged already?
MinimizerType minimizerType() const
which minimizer shall be used (e.g. SGD)
Definition NeuralNet.h:774
std::vector< double > m_dropOut
Definition NeuralNet.h:850
double m_minProgress
current limits for the progress bar
Definition NeuralNet.h:833
virtual void cycle(double progress, TString text)
Definition NeuralNet.h:799
virtual void setProgressLimits(double minProgress=0, double maxProgress=100)
Definition NeuralNet.h:790
double m_maxProgress
current limits for the progress bar
Definition NeuralNet.h:834
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition NeuralNet.h:788
virtual void drawSample(const std::vector< double > &, const std::vector< double > &, const std::vector< double > &, double)
callback for monitoring and loggging
Definition NeuralNet.h:807
double learningRate() const
get the learning rate
Definition NeuralNet.h:771
const std::vector< double > & dropFractions() const
Definition NeuralNet.h:762
void addPoint(std::string histoName, double x)
for monitoring
Definition NeuralNet.h:821
virtual ~Settings()
d'tor
size_t m_convergenceCount
Definition NeuralNet.h:857
EnumRegularization m_regularization
Definition NeuralNet.h:847
int repetitions() const
how many steps have to be gone until the batch is changed
Definition NeuralNet.h:773
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition NeuralNet.h:781
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition NeuralNet.h:823
virtual void startTrainCycle()
Definition NeuralNet.h:782
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition NeuralNet.h:766
double m_factorWeightDecay
Definition NeuralNet.h:840
double factorWeightDecay() const
get the weight-decay factor
Definition NeuralNet.h:769
bool exists(std::string histoName)
for monitoring
Definition NeuralNet.h:825
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition NeuralNet.h:828
void pads(int numPads)
preparation for monitoring
Definition NeuralNet.h:818
size_t batchSize() const
mini-batch size
Definition NeuralNet.h:767
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition NeuralNet.h:809
std::shared_ptr< Monitoring > fMonitoring
Definition NeuralNet.h:865
size_t dropRepetitions() const
Definition NeuralNet.h:761
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition NeuralNet.h:819
size_t minError() const
returns the smallest error so far
Definition NeuralNet.h:829
virtual void startTraining()
Definition NeuralNet.h:795
size_t m_maxConvergenceCount
Definition NeuralNet.h:858
virtual void startTestCycle()
callback for monitoring and loggging
Definition NeuralNet.h:804
Steepest Gradient Descent algorithm (SGD)
Definition NeuralNet.h:334
double m_beta
internal parameter (momentum)
Definition NeuralNet.h:372
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition NeuralNet.h:376
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition NeuralNet.h:373
double m_alpha
internal parameter (learningRate)
Definition NeuralNet.h:371
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition NeuralNet.h:375
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
Steepest(double learningRate=1e-4, double momentum=0.5, size_t repetitions=10)
c'tor
Definition NeuralNet.h:348
This class is needed by JsMVA, and it's a helper class for tracking errors during the training in Jup...
Definition MethodBase.h:94
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition Timer.cxx:202
Basic string class.
Definition TString.h:136
TText * text
const Double_t sigma
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double uniformDouble(double minValue, double maxValue)
Definition NeuralNet.cxx:43
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
ModeOutputValues operator|(ModeOutputValues lhs, ModeOutputValues rhs)
Definition NeuralNet.h:188
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ModeOutputValues operator&=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition NeuralNet.h:204
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition Functions.h:238
ModeErrorFunction
error functions to be chosen from
Definition NeuralNet.h:1039
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition NeuralNet.h:1050
ModeOutputValues operator|=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition NeuralNet.h:193
MinimizerType
< list all the minimizer types
Definition NeuralNet.h:321
@ fSteepest
SGD.
Definition NeuralNet.h:322
double gaussDouble(double mean, double sigma)
Definition NeuralNet.cxx:35
ModeOutputValues operator&(ModeOutputValues lhs, ModeOutputValues rhs)
Definition NeuralNet.h:199
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition NeuralNet.h:1294
bool isFlagSet(T flag, T value)
Definition NeuralNet.h:212
int randomInt(int maxValue)
Definition NeuralNet.cxx:52
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
std::vector< char > DropContainer
Definition NeuralNet.h:227
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
create variable transformations
static void output(int code)
Definition gifencode.c:226