Logo ROOT   6.10/09
Reference Guide
NeuralNet.h
Go to the documentation of this file.
1 /**
2  * @file NeuralNet
3  * @author Peter Speckmayer
4  * @version 1.0
5  *
6  * @section LICENSE
7  *
8  *
9  * @section Neural net implementation
10  *
11  * An implementation of a neural net for TMVA. This neural net uses multithreading
12  *
13  */
14 
15 
16 //////////////////////////////////////////////////////////////////////////
17 // //
18 // NeuralNet //
19 // //
20 // A neural net implementation //
21 // //
22 //////////////////////////////////////////////////////////////////////////
23 
24 #ifndef TMVA_NEURAL_NET
25 #define TMVA_NEURAL_NET
26 #pragma once
27 
28 #include <map>
29 #include <vector>
30 #include <iostream>
31 #include <fstream>
32 #include <algorithm>
33 #include <iterator>
34 #include <functional>
35 #include <tuple>
36 #include <math.h>
37 #include <cassert>
38 #include <random>
39 #include <thread>
40 #include <future>
41 #include <type_traits>
42 
43 #include "Pattern.h"
44 #include "Monitoring.h"
45 
46 #include "TApplication.h"
47 #include "Timer.h"
48 
49 #include "TH1F.h"
50 #include "TH2F.h"
51 #include "TStyle.h"
52 
53 #include <fenv.h> // turn on or off exceptions for NaN and other numeric exceptions
54 
55 
56 namespace TMVA
57 {
58 
59  class IPythonInteractive;
60 
61  namespace DNN
62  {
63 
64  // double gaussDoubl (edouble mean, double sigma);
65 
66 
67 
68  double gaussDouble (double mean, double sigma);
69  double uniformDouble (double minValue, double maxValue);
70  int randomInt (int maxValue);
71 
72 
73 
74 
76  {
77  public:
79  : m_n(0)
80  , m_sumWeights(0)
81  , m_mean(0)
82  , m_squared(0)
83  {}
84 
85  inline void clear()
86  {
87  m_n = 0;
88  m_sumWeights = 0;
89  m_mean = 0;
90  m_squared = 0;
91  }
92 
93  template <typename T>
94  inline void add(T value, double weight = 1.0)
95  {
96  ++m_n; // a value has been added
97 
98  if (m_n == 1) // initialization
99  {
100  m_mean = value;
101  m_squared = 0.0;
102  m_sumWeights = weight;
103  return;
104  }
105 
106  double tmpWeight = m_sumWeights+weight;
107  double Q = value - m_mean;
108 
109  double R = Q*weight/tmpWeight;
110  m_mean += R;
111  m_squared += m_sumWeights*R*Q;
112 
113  m_sumWeights = tmpWeight;
114  }
115 
116  template <typename ITERATOR>
117  inline void add (ITERATOR itBegin, ITERATOR itEnd)
118  {
119  for (ITERATOR it = itBegin; it != itEnd; ++it)
120  add (*it);
121  }
122 
123 
124 
125  inline int count() const { return m_n; }
126  inline double weights() const { if(m_n==0) return 0; return m_sumWeights; }
127  inline double mean() const { if(m_n==0) return 0; return m_mean; }
128  inline double var() const
129  {
130  if(m_n==0)
131  return 0;
132  if (m_squared <= 0)
133  return 0;
134  return (m_squared/m_sumWeights);
135  }
136 
137  inline double var_corr () const
138  {
139  if (m_n <= 1)
140  return var ();
141 
142  return (var()*m_n/(m_n-1)); // unbiased for small sample sizes
143  }
144 
145  inline double stdDev_corr () const { return sqrt( var_corr() ); }
146  inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes
147 
148  private:
149  size_t m_n;
150  double m_sumWeights;
151  double m_mean;
152  double m_squared;
153  };
154 
155 
156 
157  enum class EnumFunction
158  {
159  ZERO = '0',
160  LINEAR = 'L',
161  TANH = 'T',
162  RELU = 'R',
163  SYMMRELU = 'r',
164  TANHSHIFT = 't',
165  SIGMOID = 's',
166  SOFTSIGN = 'S',
167  GAUSS = 'G',
168  GAUSSCOMPLEMENT = 'C'
169  };
170 
171 
172 
174  {
175  NONE, L1, L2, L1MAX
176  };
177 
178 
179  enum class ModeOutputValues : int
180  {
181  DIRECT = 0x01,
182  SIGMOID = 0x02,
183  SOFTMAX = 0x04,
184  BATCHNORMALIZATION = 0x08
185  };
186 
187 
188 
190  {
192  }
193 
195  {
197  return lhs;
198  }
199 
201  {
203  }
204 
206  {
208  return lhs;
209  }
210 
211 
212  template <typename T>
213  bool isFlagSet (T flag, T value)
214  {
215  return (int)(value & flag) != 0;
216  }
217 
218 
219 
220  class Net;
221 
222 
223 
224 
225 
226 
227 
228  typedef std::vector<char> DropContainer;
229 
230 
231  /*! \brief The Batch class encapsulates one mini-batch
232  *
233  * Holds a const_iterator to the beginning and the end of one batch in a vector of Pattern
234  */
235  class Batch
236  {
237  public:
238  typedef typename std::vector<Pattern>::const_iterator const_iterator;
239 
240  Batch (typename std::vector<Pattern>::const_iterator itBegin, typename std::vector<Pattern>::const_iterator itEnd)
241  : m_itBegin (itBegin)
242  , m_itEnd (itEnd)
243  {}
244 
245  const_iterator begin () const { return m_itBegin; }
246  const_iterator end () const { return m_itEnd; }
247 
248  size_t size () const { return std::distance (begin (), end ()); }
249 
250  private:
251  const_iterator m_itBegin; ///< iterator denoting the beginning of the batch
252  const_iterator m_itEnd; ///< iterator denoting the end of the batch
253  };
254 
255 
256 
257 
258 
259 
260  template <typename ItSource, typename ItWeight, typename ItTarget>
261  void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd);
262 
263 
264 
265  template <typename ItSource, typename ItWeight, typename ItPrev>
266  void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd);
267 
268 
269 
270 
271 
272  template <typename ItValue, typename ItFunction>
273  void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction);
274 
275 
276  template <typename ItValue, typename ItFunction, typename ItInverseFunction, typename ItGradient>
277  void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, ItGradient itGradient);
278 
279 
280 
281  template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
282  void update (ItSource itSource, ItSource itSourceEnd,
283  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
284  ItTargetGradient itTargetGradientBegin,
285  ItGradient itGradient);
286 
287 
288 
289  template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
290  void update (ItSource itSource, ItSource itSourceEnd,
291  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
292  ItTargetGradient itTargetGradientBegin,
293  ItGradient itGradient,
294  ItWeight itWeight, double weightDecay);
295 
296 
297 
298  // ----- signature of a minimizer -------------
299  // class Minimizer
300  // {
301  // public:
302 
303  // template <typename Function, typename Variables, typename PassThrough>
304  // double operator() (Function& fnc, Variables& vars, PassThrough& passThrough)
305  // {
306  // // auto itVars = begin (vars);
307  // // auto itVarsEnd = end (vars);
308 
309  // std::vector<double> myweights;
310  // std::vector<double> gradients;
311 
312  // double value = fnc (passThrough, myweights);
313  // value = fnc (passThrough, myweights, gradients);
314  // return value;
315  // }
316  // };
317 
318 
319 
320  ///< list all the minimizer types
322  {
323  fSteepest ///< SGD
324  };
325 
326 
327 
328 
329 
330  /*! \brief Steepest Gradient Descent algorithm (SGD)
331  *
332  * Implements a steepest gradient descent minimization algorithm
333  */
334  class Steepest
335  {
336  public:
337 
339 
340 
341  /*! \brief c'tor
342  *
343  * C'tor
344  *
345  * \param learningRate denotes the learning rate for the SGD algorithm
346  * \param momentum fraction of the velocity which is taken over from the last step
347  * \param repetitions re-compute the gradients each "repetitions" steps
348  */
349  Steepest (double learningRate = 1e-4,
350  double momentum = 0.5,
351  size_t repetitions = 10)
352  : m_repetitions (repetitions)
353  , m_alpha (learningRate)
354  , m_beta (momentum)
355  {}
356 
357  /*! \brief operator to call the steepest gradient descent algorithm
358  *
359  * entry point to start the minimization procedure
360  *
361  * \param fitnessFunction (templated) function which has to be provided. This function is minimized
362  * \param weights (templated) a reference to a container of weights. The result of the minimization procedure
363  * is returned via this reference (needs to support std::begin and std::end
364  * \param passThrough (templated) object which can hold any data which the fitness function needs. This object
365  * is not touched by the minimizer; This object is provided to the fitness function when
366  * called
367  */
368  template <typename Function, typename Weights, typename PassThrough>
369  double operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough);
370 
371 
372  double m_alpha; ///< internal parameter (learningRate)
373  double m_beta; ///< internal parameter (momentum)
374  std::vector<double> m_prevGradients; ///< vector remembers the gradients of the previous step
375 
376  std::vector<double> m_localWeights; ///< local weights for reuse in thread.
377  std::vector<double> m_localGradients; ///< local gradients for reuse in thread.
378  };
379 
380 
381 
382 
383 
384 
385 
386 
387 
388 
389 
390 
391 
392 
393 
394 
395 
396 
397  template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
398  double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
399 
400 
401 
402  template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
403  double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
404 
405 
406 
407 
408  template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
409  double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
410 
411 
412 
413 
414 
415  template <typename ItWeight>
416  double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization);
417 
418 
419 
420 
421 
422 
423 
424 
425 
426 
427 
428 
429 
430 
431  /*! \brief LayerData holds the data of one layer
432  *
433  * LayerData holds the data of one layer, but not its layout
434  *
435  *
436  */
437  class LayerData
438  {
439  public:
440  typedef std::vector<double> container_type;
441 
442  typedef container_type::iterator iterator_type;
443  typedef container_type::const_iterator const_iterator_type;
444 
445  typedef std::vector<std::function<double(double)> > function_container_type;
446  typedef function_container_type::iterator function_iterator_type;
447  typedef function_container_type::const_iterator const_function_iterator_type;
448 
449  typedef DropContainer::const_iterator const_dropout_iterator;
450 
451  /*! \brief c'tor of LayerData
452  *
453  * C'tor of LayerData for the input layer
454  *
455  * \param itInputBegin iterator to the begin of a vector which holds the values of the nodes of the neural net
456  * \param itInputEnd iterator to the end of a vector which holdsd the values of the nodes of the neural net
457  * \param eModeOutput indicates a potential tranformation of the output values before further computation
458  * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
459  * output value (to create a probability); SOFTMAX applies a softmax transformation to all
460  * output values (mutually exclusive probability)
461  */
462  LayerData (const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput = ModeOutputValues::DIRECT);
463 
464 
465  /*! \brief c'tor of LayerData
466  *
467  * C'tor of LayerData for the input layer
468  *
469  * \param inputSize input size of this layer
470  */
471  LayerData (size_t inputSize);
473 
474 
475  /*! \brief c'tor of LayerData
476  *
477  * C'tor of LayerData for all layers which are not the input layer; Used during the training of the DNN
478  *
479  * \param size size of the layer
480  * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
481  * \param itGradientBegin indicates the start of the gradients for this layer on the gradient vector
482  * \param itFunctionBegin indicates the start of the vector of activation functions for this layer on the
483  * activation function vector
484  * \param itInverseFunctionBegin indicates the start of the vector of activation functions for this
485  * layer on the activation function vector
486  * \param eModeOutput indicates a potential tranformation of the output values before further computation
487  * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
488  * output value (to create a probability); SOFTMAX applies a softmax transformation to all
489  * output values (mutually exclusive probability)
490  */
491  LayerData (size_t size,
492  const_iterator_type itWeightBegin,
493  iterator_type itGradientBegin,
494  std::shared_ptr<std::function<double(double)>> activationFunction,
495  std::shared_ptr<std::function<double(double)>> inverseActivationFunction,
497 
498  /*! \brief c'tor of LayerData
499  *
500  * C'tor of LayerData for all layers which are not the input layer; Used during the application of the DNN
501  *
502  * \param size size of the layer
503  * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
504  * \param itFunctionBegin indicates the start of the vector of activation functions for this layer on the
505  * activation function vector
506  * \param eModeOutput indicates a potential tranformation of the output values before further computation
507  * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
508  * output value (to create a probability); SOFTMAX applies a softmax transformation to all
509  * output values (mutually exclusive probability)
510  */
511  LayerData (size_t size, const_iterator_type itWeightBegin,
512  std::shared_ptr<std::function<double(double)>> activationFunction,
514 
515  /*! \brief copy c'tor of LayerData
516  *
517  *
518  */
519  LayerData (const LayerData& other)
520  : m_size (other.m_size)
521  , m_itInputBegin (other.m_itInputBegin)
522  , m_itInputEnd (other.m_itInputEnd)
523  , m_deltas (other.m_deltas)
524  , m_valueGradients (other.m_valueGradients)
525  , m_values (other.m_values)
526  , m_itDropOut (other.m_itDropOut)
527  , m_hasDropOut (other.m_hasDropOut)
528  , m_itConstWeightBegin (other.m_itConstWeightBegin)
529  , m_itGradientBegin (other.m_itGradientBegin)
530  , m_activationFunction (other.m_activationFunction)
531  , m_inverseActivationFunction (other.m_inverseActivationFunction)
532  , m_isInputLayer (other.m_isInputLayer)
533  , m_hasWeights (other.m_hasWeights)
534  , m_hasGradients (other.m_hasGradients)
535  , m_eModeOutput (other.m_eModeOutput)
536  {}
537 
538  /*! \brief move c'tor of LayerData
539  *
540  *
541  */
543  : m_size (other.m_size)
544  , m_itInputBegin (other.m_itInputBegin)
545  , m_itInputEnd (other.m_itInputEnd)
546  , m_deltas (std::move(other.m_deltas))
547  , m_valueGradients (std::move(other.m_valueGradients))
548  , m_values (std::move(other.m_values))
549  , m_itDropOut (other.m_itDropOut)
550  , m_hasDropOut (other.m_hasDropOut)
551  , m_itConstWeightBegin (other.m_itConstWeightBegin)
552  , m_itGradientBegin (other.m_itGradientBegin)
553  , m_activationFunction (std::move(other.m_activationFunction))
554  , m_inverseActivationFunction (std::move(other.m_inverseActivationFunction))
555  , m_isInputLayer (other.m_isInputLayer)
556  , m_hasWeights (other.m_hasWeights)
557  , m_hasGradients (other.m_hasGradients)
558  , m_eModeOutput (other.m_eModeOutput)
559  {}
560 
561 
562  /*! \brief change the input iterators
563  *
564  *
565  * \param itInputBegin indicates the start of the input node vector
566  * \param itInputEnd indicates the end of the input node vector
567  *
568  */
569  void setInput (const_iterator_type itInputBegin, const_iterator_type itInputEnd)
570  {
571  m_isInputLayer = true;
572  m_itInputBegin = itInputBegin;
573  m_itInputEnd = itInputEnd;
574  }
575 
576  /*! \brief clear the values and the deltas
577  *
578  *
579  */
580  void clear ()
581  {
582  m_values.assign (m_values.size (), 0.0);
583  m_deltas.assign (m_deltas.size (), 0.0);
584  }
585 
586  const_iterator_type valuesBegin () const { return m_isInputLayer ? m_itInputBegin : begin (m_values); } ///< returns const iterator to the begin of the (node) values
587  const_iterator_type valuesEnd () const { return m_isInputLayer ? m_itInputEnd : end (m_values); } ///< returns iterator to the end of the (node) values
588 
589  iterator_type valuesBegin () { assert (!m_isInputLayer); return begin (m_values); } ///< returns iterator to the begin of the (node) values
590  iterator_type valuesEnd () { assert (!m_isInputLayer); return end (m_values); } ///< returns iterator to the end of the (node) values
591 
592  ModeOutputValues outputMode () const { return m_eModeOutput; } ///< returns the output mode
593  container_type probabilities () const { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them
594 
595  iterator_type deltasBegin () { return begin (m_deltas); } ///< returns iterator to the begin of the deltas (back-propagation)
596  iterator_type deltasEnd () { return end (m_deltas); } ///< returns iterator to the end of the deltas (back-propagation)
597 
598  const_iterator_type deltasBegin () const { return begin (m_deltas); } ///< returns const iterator to the begin of the deltas (back-propagation)
599  const_iterator_type deltasEnd () const { return end (m_deltas); } ///< returns const iterator to the end of the deltas (back-propagation)
600 
601  iterator_type valueGradientsBegin () { return begin (m_valueGradients); } ///< returns iterator to the begin of the gradients of the node values
602  iterator_type valueGradientsEnd () { return end (m_valueGradients); } ///< returns iterator to the end of the gradients of the node values
603 
604  const_iterator_type valueGradientsBegin () const { return begin (m_valueGradients); } ///< returns const iterator to the begin of the gradients
605  const_iterator_type valueGradientsEnd () const { return end (m_valueGradients); } ///< returns const iterator to the end of the gradients
606 
607  iterator_type gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients
608  const_iterator_type gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients
609  const_iterator_type weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer
610 
611  std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; }
612  std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; }
613 
614  /*! \brief set the drop-out info for this layer
615  *
616  */
617  template <typename Iterator>
618  void setDropOut (Iterator itDrop) { m_itDropOut = itDrop; m_hasDropOut = true; }
619 
620  /*! \brief clear the drop-out-data for this layer
621  *
622  *
623  */
624  void clearDropOut () { m_hasDropOut = false; }
625 
626  bool hasDropOut () const { return m_hasDropOut; } ///< has this layer drop-out turned on?
627  const_dropout_iterator dropOut () const { assert (m_hasDropOut); return m_itDropOut; } ///< return the begin of the drop-out information
628 
629  size_t size () const { return m_size; } ///< return the size of the layer
630 
631  private:
632 
633  /*! \brief compute the probabilities from the node values
634  *
635  *
636  */
637  container_type computeProbabilities () const;
638 
639  private:
640 
641  size_t m_size; ////< layer size
642 
643  const_iterator_type m_itInputBegin; ///< iterator to the first of the nodes in the input node vector
644  const_iterator_type m_itInputEnd; ///< iterator to the end of the nodes in the input node vector
645 
646  std::vector<double> m_deltas; ///< stores the deltas for the DNN training
647  std::vector<double> m_valueGradients; ///< stores the gradients of the values (nodes)
648  std::vector<double> m_values; ///< stores the values of the nodes in this layer
649  const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped
650  bool m_hasDropOut; ///< dropOut is turned on?
651 
652  const_iterator_type m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector
653  iterator_type m_itGradientBegin; ///< iterator to the first gradient of this layer in the gradient vector
654 
655  std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< activation function for this layer
656  std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< inverse activation function for this layer
657 
658  bool m_isInputLayer; ///< is this layer an input layer
659  bool m_hasWeights; ///< does this layer have weights (it does not if it is the input layer)
660  bool m_hasGradients; ///< does this layer have gradients (only if in training mode)
661 
662  ModeOutputValues m_eModeOutput; ///< stores the output mode (DIRECT, SIGMOID, SOFTMAX)
663 
664  };
665 
666 
667 
668 
669 
670  /*! \brief Layer defines the layout of a layer
671  *
672  * Layer defines the layout of a specific layer in the DNN
673  * Objects of this class don't hold the layer data itself (see class "LayerData")
674  *
675  */
676  class Layer
677  {
678  public:
679 
680  /*! \brief c'tor for defining a Layer
681  *
682  *
683  * \param itInputBegin indicates the start of the input node vector
684  * \param itInputEnd indicates the end of the input node vector
685  *
686  */
687  Layer (size_t numNodes, EnumFunction activationFunction, ModeOutputValues eModeOutputValues = ModeOutputValues::DIRECT);
688 
689  ModeOutputValues modeOutputValues () const { return m_eModeOutputValues; } ///< get the mode-output-value (direct, probabilities)
690  void modeOutputValues (ModeOutputValues eModeOutputValues) { m_eModeOutputValues = eModeOutputValues; } ///< set the mode-output-value
691 
692  size_t numNodes () const { return m_numNodes; } ///< return the number of nodes of this layer
693  size_t numWeights (size_t numInputNodes) const { return numInputNodes * numNodes (); } ///< return the number of weights for this layer (fully connected)
694 
695  std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; } ///< fetch the activation function for this layer
696  std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; } ///< fetch the inverse activation function for this layer
697 
698  EnumFunction activationFunctionType () const { return m_activationFunctionType; } ///< get the activation function type for this layer
699 
700  private:
701 
702 
703  std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< stores the activation function
704  std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< stores the inverse activation function
705 
706 
707  size_t m_numNodes;
708 
709  ModeOutputValues m_eModeOutputValues; ///< do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned as such
711 
712  friend class Net;
713  };
714 
715 
716 
717 
718 
719  template <typename LAYERDATA>
720  void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
721 
722 
723  template <typename LAYERDATA>
724  void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
725 
726 
727  template <typename LAYERDATA>
728  void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization);
729 
730 
731 
732  /*! \brief Settings for the training of the neural net
733  *
734  *
735  */
736  class Settings
737  {
738  public:
739 
740  /*! \brief c'tor
741  *
742  *
743  */
745  size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
746  double _factorWeightDecay = 1e-5, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE,
747  MinimizerType _eMinimizerType = MinimizerType::fSteepest,
748  double _learningRate = 1e-5, double _momentum = 0.3,
749  int _repetitions = 3,
750  bool _multithreading = true);
751 
752  /*! \brief d'tor
753  *
754  *
755  */
756  virtual ~Settings ();
757 
758 
759  /*! \brief set the drop-out configuration (layer-wise)
760  *
761  * \param begin begin of an array or vector denoting the drop-out probabilities for each layer
762  * \param end end of an array or vector denoting the drop-out probabilities for each layer
763  * \param _dropRepetitions denotes after how many repetitions the drop-out setting (which nodes are dropped out exactly) is changed
764  */
765  template <typename Iterator>
766  void setDropOut (Iterator begin, Iterator end, size_t _dropRepetitions) { m_dropOut.assign (begin, end); m_dropRepetitions = _dropRepetitions; }
767 
768  size_t dropRepetitions () const { return m_dropRepetitions; }
769  const std::vector<double>& dropFractions () const { return m_dropOut; }
770 
771  void setMonitoring (std::shared_ptr<Monitoring> ptrMonitoring) { fMonitoring = ptrMonitoring; } ///< prepared for monitoring
772 
773  size_t convergenceSteps () const { return m_convergenceSteps; } ///< how many steps until training is deemed to have converged
774  size_t batchSize () const { return m_batchSize; } ///< mini-batch size
775  size_t testRepetitions () const { return m_testRepetitions; } ///< how often is the test data tested
776  double factorWeightDecay () const { return m_factorWeightDecay; } ///< get the weight-decay factor
777 
778  double learningRate () const { return fLearningRate; } ///< get the learning rate
779  double momentum () const { return fMomentum; } ///< get the momentum (e.g. for SGD)
780  int repetitions () const { return fRepetitions; } ///< how many steps have to be gone until the batch is changed
781  MinimizerType minimizerType () const { return fMinimizerType; } ///< which minimizer shall be used (e.g. SGD)
782 
783 
784 
785 
786 
787 
788  virtual void testSample (double /*error*/, double /*output*/, double /*target*/, double /*weight*/) {} ///< virtual function to be used for monitoring (callback)
789  virtual void startTrainCycle () ///< callback for monitoring and logging
790  {
791  m_convergenceCount = 0;
792  m_maxConvergenceCount= 0;
793  m_minError = 1e10;
794  }
795  virtual void endTrainCycle (double /*error*/) {} ///< callback for monitoring and logging
796 
797  virtual void setProgressLimits (double minProgress = 0, double maxProgress = 100) ///< for monitoring and logging (set the current "progress" limits for the display of the progress)
798  {
799  m_minProgress = minProgress;
800  m_maxProgress = maxProgress;
801  }
802  virtual void startTraining () ///< start drawing the progress bar
803  {
804  m_timer.DrawProgressBar (Int_t(m_minProgress));
805  }
806  virtual void cycle (double progress, TString text) ///< advance on the progress bar
807  {
808  m_timer.DrawProgressBar (Int_t(m_minProgress+(m_maxProgress-m_minProgress)*(progress/100.0)), text);
809  }
810 
811  virtual void startTestCycle () {} ///< callback for monitoring and loggging
812  virtual void endTestCycle () {} ///< callback for monitoring and loggging
813  virtual void testIteration () {} ///< callback for monitoring and loggging
814  virtual void drawSample (const std::vector<double>& /*input*/, const std::vector<double>& /* output */, const std::vector<double>& /* target */, double /* patternWeight */) {} ///< callback for monitoring and loggging
815 
816  virtual void computeResult (const Net& /* net */, std::vector<double>& /* weights */) {} ///< callback for monitoring and loggging
817 
818  virtual bool hasConverged (double testError); ///< has this training converged already?
819 
820  EnumRegularization regularization () const { return m_regularization; } ///< some regularization of the DNN is turned on?
821 
822  bool useMultithreading () const { return m_useMultithreading; } ///< is multithreading turned on?
823 
824 
825  void pads (int numPads) { if (fMonitoring) fMonitoring->pads (numPads); } ///< preparation for monitoring
826  void create (std::string histoName, int bins, double min, double max) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max); } ///< for monitoring
827  void create (std::string histoName, int bins, double min, double max, int bins2, double min2, double max2) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max, bins2, min2, max2); } ///< for monitoring
828  void addPoint (std::string histoName, double x) { if (fMonitoring) fMonitoring->addPoint (histoName, x); } ///< for monitoring
829  void addPoint (std::string histoName, double x, double y) {if (fMonitoring) fMonitoring->addPoint (histoName, x, y); } ///< for monitoring
830  void plot (std::string histoName, std::string options, int pad, EColor color) { if (fMonitoring) fMonitoring->plot (histoName, options, pad, color); } ///< for monitoring
831  void clear (std::string histoName) { if (fMonitoring) fMonitoring->clear (histoName); } ///< for monitoring
832  bool exists (std::string histoName) { if (fMonitoring) return fMonitoring->exists (histoName); return false; } ///< for monitoring
833 
834  size_t convergenceCount () const { return m_convergenceCount; } ///< returns the current convergence count
835  size_t maxConvergenceCount () const { return m_maxConvergenceCount; } ///< returns the max convergence count so far
836  size_t minError () const { return m_minError; } ///< returns the smallest error so far
837 
838  public:
839  Timer m_timer; ///< timer for monitoring
840  double m_minProgress; ///< current limits for the progress bar
841  double m_maxProgress; ///< current limits for the progress bar
842 
843 
844  size_t m_convergenceSteps; ///< number of steps without improvement to consider the DNN to have converged
845  size_t m_batchSize; ///< mini-batch size
848 
849  size_t count_E;
850  size_t count_dE;
851  size_t count_mb_E;
852  size_t count_mb_dE;
853 
855 
857  std::vector<double> m_dropOut;
858 
860  double fMomentum;
863 
866  double m_minError;
867 
868 
869  protected:
871 
872  std::shared_ptr<Monitoring> fMonitoring;
873  };
874 
875 
876 
877 
878 
879 
880 
881 
882 
883 
884 
885 
886 
887 
888 
889 
890 
891 
892 
893 
894 
895 
896 
897  /*! \brief Settings for classification
898  *
899  * contains additional settings if the DNN problem is classification
900  */
902  {
903  public:
904  /*! \brief c'tor
905  *
906  *
907  */
909  size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
910  double _factorWeightDecay = 1e-5, EnumRegularization _regularization = EnumRegularization::NONE,
911  size_t _scaleToNumEvents = 0, MinimizerType _eMinimizerType = MinimizerType::fSteepest,
912  double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3,
913  bool _useMultithreading = true)
914  : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay,
915  _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading)
916  , m_ams ()
917  , m_sumOfSigWeights (0)
918  , m_sumOfBkgWeights (0)
919  , m_scaleToNumEvents (_scaleToNumEvents)
920  , m_cutValue (10.0)
921  , m_pResultPatternContainer (NULL)
922  , m_fileNameResult ()
923  , m_fileNameNetConfig ()
924  {
925  }
926 
927  /*! \brief d'tor
928  *
929  *
930  */
932  {
933  }
934 
935  void startTrainCycle ();
936  void endTrainCycle (double /*error*/);
937  void testIteration () { if (fMonitoring) fMonitoring->ProcessEvents (); }
938 
939 
940  /* void createHistograms () */
941  /* { */
942  /* std::cout << "is hist ROC existing?" << std::endl; */
943  /* if (m_histROC) */
944  /* { */
945  /* std::cout << "--> yes" << std::endl; */
946  /* fMonitoring->ProcessEvents (); */
947  /* return; */
948  /* } */
949 
950  /* std::cout << "create histograms" << std::endl; */
951  /* TCanvas* canvas = fMonitoring->GetCanvas (); */
952  /* if (canvas) */
953  /* { */
954  /* std::cout << "canvas divide" << std::endl; */
955  /* canvas->cd (); */
956  /* canvas->Divide (2,2); */
957  /* } */
958  /* if (!m_histROC) */
959  /* { */
960  /* m_histROC = new TH2F ("ROC","ROC", 1000, 0, 1.0, 1000, 0, 1.0); m_histROC->SetDirectory (0); */
961  /* m_histROC->SetLineColor (kBlue); */
962  /* } */
963  /* if (!m_histSignificance) */
964  /* { */
965  /* m_histSignificance = new TH2F ("Significance", "Significance", 1000, 0,1.0, 5, 0.0, 2.0); */
966  /* m_histSignificance->SetDirectory (0); */
967  /* m_histSignificance->SetBit (TH1::kCanRebin); */
968  /* m_histROC->SetLineColor (kRed); */
969  /* } */
970  /* if (!m_histError) */
971  /* { */
972  /* m_histError = new TH1F ("Error", "Error", 100, 0, 100); */
973  /* m_histError->SetDirectory (0); */
974  /* m_histError->SetBit (TH1::kCanRebin); */
975  /* m_histROC->SetLineColor (kGreen); */
976  /* } */
977  /* if (!m_histOutputSignal) */
978  /* { */
979  /* m_histOutputSignal = new TH1F ("Signal", "Signal", 100, 0, 1.0); */
980  /* m_histOutputSignal->SetDirectory (0); */
981  /* m_histOutputSignal->SetBit (TH1::kCanRebin); */
982  /* } */
983  /* if (!m_histOutputBackground) */
984  /* { */
985  /* m_histOutputBackground = new TH1F ("Background", "Background", 100, 0, 1.0); */
986  /* m_histOutputBackground->SetDirectory (0); */
987  /* m_histOutputBackground->SetBit (TH1::kCanRebin); */
988  /* } */
989 
990  /* fMonitoring->ProcessEvents (); */
991  /* } */
992 
993  void testSample (double error, double output, double target, double weight);
994 
995  virtual void startTestCycle ();
996  virtual void endTestCycle ();
997 
998 
999  void setWeightSums (double sumOfSigWeights, double sumOfBkgWeights);
1000  void setResultComputation (std::string _fileNameNetConfig, std::string _fileNameResult, std::vector<Pattern>* _resultPatternContainer);
1001 
1002  std::vector<double> m_input;
1003  std::vector<double> m_output;
1004  std::vector<double> m_targets;
1005  std::vector<double> m_weights;
1006 
1007  std::vector<double> m_ams;
1008  std::vector<double> m_significances;
1009 
1010 
1014 
1015  double m_cutValue;
1016  std::vector<Pattern>* m_pResultPatternContainer;
1017  std::string m_fileNameResult;
1018  std::string m_fileNameNetConfig;
1019 
1020 
1021  /* TH2F* m_histROC; */
1022  /* TH2F* m_histSignificance; */
1023 
1024  /* TH1F* m_histError; */
1025  /* TH1F* m_histOutputSignal; */
1026  /* TH1F* m_histOutputBackground; */
1027  };
1028 
1029 
1030 
1031 
1032 
1033 
1034 
1035  ///< used to distinguish between different function signatures
1036  enum class ModeOutput
1037  {
1038  FETCH
1039  };
1040 
1041  /*! \brief error functions to be chosen from
1042  *
1043  *
1044  */
1046  {
1047  SUMOFSQUARES = 'S',
1048  CROSSENTROPY = 'C',
1050  };
1051 
1052  /*! \brief weight initialization strategies to be chosen from
1053  *
1054  *
1055  */
1057  {
1059  };
1060 
1061 
1062 
1063  /*! \brief neural net
1064  *
1065  * holds the structure of all layers and some data for the whole net
1066  * does not know the layer data though (i.e. values of the nodes and weights)
1067  */
1068  class Net
1069  {
1070  public:
1071 
1072  typedef std::vector<double> container_type;
1073  typedef container_type::iterator iterator_type;
1074  typedef std::pair<iterator_type,iterator_type> begin_end_type;
1075 
1076 
1077  /*! \brief c'tor
1078  *
1079  *
1080  */
1081  Net ()
1082  : m_eErrorFunction (ModeErrorFunction::SUMOFSQUARES)
1083  , m_sizeInput (0)
1084  , m_layers ()
1085  {
1086  }
1087 
1088  /*! \brief d'tor
1089  *
1090  *
1091  */
1092  Net (const Net& other)
1093  : m_eErrorFunction (other.m_eErrorFunction)
1094  , m_sizeInput (other.m_sizeInput)
1095  , m_layers (other.m_layers)
1096  {
1097  }
1098 
1099  void setInputSize (size_t sizeInput) { m_sizeInput = sizeInput; } ///< set the input size of the DNN
1100  void setOutputSize (size_t sizeOutput) { m_sizeOutput = sizeOutput; } ///< set the output size of the DNN
1101  void addLayer (Layer& layer) { m_layers.push_back (layer); } ///< add a layer (layout)
1102  void addLayer (Layer&& layer) { m_layers.push_back (layer); }
1103  void setErrorFunction (ModeErrorFunction eErrorFunction) { m_eErrorFunction = eErrorFunction; } ///< which error function is to be used
1104 
1105  size_t inputSize () const { return m_sizeInput; } ///< input size of the DNN
1106  size_t outputSize () const { return m_sizeOutput; } ///< output size of the DNN
1107 
1108  /*! \brief set the drop out configuration
1109  *
1110  *
1111  */
1112  template <typename WeightsType, typename DropProbabilities>
1113  void dropOutWeightFactor (WeightsType& weights,
1114  const DropProbabilities& drops,
1115  bool inverse = false);
1116 
1117  /*! \brief start the training
1118  *
1119  * \param weights weight vector
1120  * \param trainPattern training pattern
1121  * \param testPattern test pattern
1122  * \param minimizer use this minimizer for training (e.g. SGD)
1123  * \param settings settings used for this training run
1124  */
1125  template <typename Minimizer>
1126  double train (std::vector<double>& weights,
1127  std::vector<Pattern>& trainPattern,
1128  const std::vector<Pattern>& testPattern,
1129  Minimizer& minimizer,
1130  Settings& settings);
1131 
1132  /*! \brief pre-training for future use
1133  *
1134  *
1135  */
1136  template <typename Minimizer>
1137  void preTrain (std::vector<double>& weights,
1138  std::vector<Pattern>& trainPattern,
1139  const std::vector<Pattern>& testPattern,
1140  Minimizer& minimizer, Settings& settings);
1141 
1142 
1143  /*! \brief executes one training cycle
1144  *
1145  * \param minimizier the minimizer to be used
1146  * \param weights the weight vector to be used
1147  * \param itPatternBegin the pattern to be trained with
1148  * \param itPatternEnd the pattern to be trainied with
1149  * \param settings the settings for the training
1150  * \param dropContainer the configuration for DNN drop-out
1151  */
1152  template <typename Iterator, typename Minimizer>
1153  inline double trainCycle (Minimizer& minimizer, std::vector<double>& weights,
1154  Iterator itPatternBegin, Iterator itPatternEnd,
1155  Settings& settings,
1156  DropContainer& dropContainer);
1157 
1158  size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net
1159  size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net
1160 
1161  template <typename Weights>
1162  std::vector<double> compute (const std::vector<double>& input, const Weights& weights) const; ///< compute the net with the given input and the given weights
1163 
1164  template <typename Weights, typename PassThrough>
1165  double operator() (PassThrough& settingsAndBatch, const Weights& weights) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients
1166 
1167  template <typename Weights, typename PassThrough, typename OutContainer>
1168  double operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput eFetch, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function
1169 
1170  template <typename Weights, typename Gradients, typename PassThrough>
1171  double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well
1172 
1173  template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1174  double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const;
1175 
1176 
1177  template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1178  std::vector<std::vector<LayerData>> prepareLayerData (LayerContainer& layers,
1179  Batch& batch,
1180  const DropContainer& dropContainer,
1181  ItWeight itWeightBegin,
1182  ItWeight itWeightEnd,
1183  ItGradient itGradientBegin,
1184  ItGradient itGradientEnd,
1185  size_t& totalNumWeights) const;
1186 
1187  template <typename LayerContainer>
1188  void forwardPattern (const LayerContainer& _layers,
1189  std::vector<LayerData>& layerData) const;
1190 
1191 
1192  template <typename LayerContainer, typename LayerPatternContainer>
1193  void forwardBatch (const LayerContainer& _layers,
1194  LayerPatternContainer& layerPatternData,
1195  std::vector<double>& valuesMean,
1196  std::vector<double>& valuesStdDev,
1197  size_t trainFromLayer) const;
1198 
1199  template <typename OutputContainer>
1200  void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const;
1201 
1202  template <typename OutputContainer>
1203  void fetchOutput (const std::vector<LayerData>& layerPatternData, OutputContainer& outputContainer) const;
1204 
1205 
1206  template <typename ItWeight>
1207  std::tuple</*sumError*/double,/*sumWeights*/double> computeError (const Settings& settings,
1208  std::vector<LayerData>& lastLayerData,
1209  Batch& batch,
1210  ItWeight itWeightBegin,
1211  ItWeight itWeightEnd) const;
1212 
1213  template <typename Settings>
1214  void backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1215  const Settings& settings,
1216  size_t trainFromLayer,
1217  size_t totalNumWeights) const;
1218 
1219 
1220 
1221  /*! \brief main NN computation function
1222  *
1223  *
1224  */
1225  template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1226  double forward_backward (LayerContainer& layers, PassThrough& settingsAndBatch,
1227  ItWeight itWeightBegin, ItWeight itWeightEnd,
1228  ItGradient itGradientBegin, ItGradient itGradientEnd,
1229  size_t trainFromLayer,
1230  OutContainer& outputContainer, bool fetchOutput) const;
1231 
1232 
1233 
1234  double E ();
1235  void dE ();
1236 
1237 
1238  /*! \brief computes the error of the DNN
1239  *
1240  *
1241  */
1242  template <typename Container, typename ItWeight>
1243  double errorFunction (LayerData& layerData,
1244  Container truth,
1245  ItWeight itWeight,
1246  ItWeight itWeightEnd,
1247  double patternWeight,
1248  double factorWeightDecay,
1249  EnumRegularization eRegularization) const;
1250 
1251 
1252  const std::vector<Layer>& layers () const { return m_layers; } ///< returns the layers (structure)
1253  std::vector<Layer>& layers () { return m_layers; } ///< returns the layers (structure)
1254 
1255  void removeLayer () { m_layers.pop_back (); } ///< remove one layer
1256 
1257 
1258  void clear () ///< clear one layer
1259  {
1260  m_layers.clear ();
1261  m_eErrorFunction = ModeErrorFunction::SUMOFSQUARES;
1262  }
1263 
1264 
1265  template <typename OutIterator>
1266  void initializeWeights (WeightInitializationStrategy eInitStrategy,
1267  OutIterator itWeight); ///< initialize the weights with the given strategy
1268 
1269  protected:
1270 
1271  void fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t numNodes) const; ///< prepare the drop-out-container (select the nodes which are to be dropped out)
1272 
1273 
1274  private:
1275 
1276  ModeErrorFunction m_eErrorFunction; ///< denotes the error function
1277  size_t m_sizeInput; ///< input size of this DNN
1278  size_t m_sizeOutput; ///< outut size of this DNN
1279  std::vector<Layer> m_layers; ///< layer-structure-data
1280 
1281  protected:
1282  // variables for JsMVA (interactive training in jupyter notebook)
1283  IPythonInteractive *fInteractive = nullptr;
1284  bool * fExitFromTraining = nullptr;
1285  UInt_t *fIPyMaxIter = nullptr, *fIPyCurrentIter = nullptr;
1286 
1287  public:
1288 
1289  // setup ipython interactive variables
1291  fInteractive = fI;
1292  fExitFromTraining = fE;
1293  fIPyMaxIter = M;
1294  fIPyCurrentIter = C;
1295  }
1296  };
1297 
1298 
1299 
1300 
1301 typedef std::tuple<Settings&, Batch&, DropContainer&> pass_through_type;
1302 
1303 
1304 
1305 
1306 
1307 
1308 
1309  } // namespace DNN
1310 } // namespace TMVA
1311 
1312 
1313 // include the implementations (in header file, because they are templated)
1314 #include "TMVA/NeuralNet.icc"
1315 
1316 #endif
1317 
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:828
void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:937
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:612
virtual void startTraining()
Definition: NeuralNet.h:802
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition: NeuralNet.h:1276
const_iterator_type weightsBegin() const
returns const iterator to the begin of the weights for this layer
Definition: NeuralNet.h:609
void setInput(const_iterator_type itInputBegin, const_iterator_type itInputEnd)
change the input iterators
Definition: NeuralNet.h:569
void addLayer(Layer &&layer)
Definition: NeuralNet.h:1102
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:834
virtual ~ClassificationSettings()
d&#39;tor
Definition: NeuralNet.h:931
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:806
MinimizerType
< list all the minimizer types
Definition: NeuralNet.h:321
void add(ITERATOR itBegin, ITERATOR itEnd)
Definition: NeuralNet.h:117
double var_corr() const
Definition: NeuralNet.h:137
size_t m_sizeInput
input size of this DNN
Definition: NeuralNet.h:1277
std::vector< Layer > & layers()
returns the layers (structure)
Definition: NeuralNet.h:1253
void create(std::string histoName, int bins, double min, double max, int bins2, double min2, double max2)
for monitoring
Definition: NeuralNet.h:827
std::vector< char > DropContainer
Definition: NeuralNet.h:220
void setDropOut(Iterator begin, Iterator end, size_t _dropRepetitions)
set the drop-out configuration (layer-wise)
Definition: NeuralNet.h:766
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
stores the inverse activation function
Definition: NeuralNet.h:704
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:595
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:213
double m_maxProgress
current limits for the progress bar
Definition: NeuralNet.h:841
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:334
ModeOutputValues modeOutputValues() const
get the mode-output-value (direct, probabilities)
Definition: NeuralNet.h:689
double T(double x)
Definition: ChebyshevPol.h:34
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:773
std::pair< iterator_type, iterator_type > begin_end_type
Definition: NeuralNet.h:1074
double m_dropRepetitions
Definition: NeuralNet.h:856
std::vector< std::function< double(double)> > function_container_type
Definition: NeuralNet.h:445
void add(T value, double weight=1.0)
Definition: NeuralNet.h:94
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:769
Net()
c&#39;tor
Definition: NeuralNet.h:1081
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:570
size_t inputSize() const
input size of the DNN
Definition: NeuralNet.h:1105
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1103
iterator_type valueGradientsEnd()
returns iterator to the end of the gradients of the node values
Definition: NeuralNet.h:602
std::shared_ptr< Monitoring > fMonitoring
Definition: NeuralNet.h:872
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:820
Basic string class.
Definition: TString.h:129
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:822
int Int_t
Definition: RtypesCore.h:41
function_container_type::iterator function_iterator_type
Definition: NeuralNet.h:446
ModeOutputValues operator|(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:189
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:830
const_iterator_type gradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:608
iterator_type m_itGradientBegin
iterator to the first gradient of this layer in the gradient vector
Definition: NeuralNet.h:653
STL namespace.
bool m_hasGradients
does this layer have gradients (only if in training mode)
Definition: NeuralNet.h:660
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:181
#define NULL
Definition: RtypesCore.h:88
ModeOutputValues operator &(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:200
ModeOutputValues m_eModeOutput
stores the output mode (DIRECT, SIGMOID, SOFTMAX)
Definition: NeuralNet.h:662
size_t numWeights(size_t numInputNodes) const
return the number of weights for this layer (fully connected)
Definition: NeuralNet.h:693
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1100
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:813
bool m_isInputLayer
is this layer an input layer
Definition: NeuralNet.h:658
double momentum() const
get the momentum (e.g. for SGD)
Definition: NeuralNet.h:779
TRObject operator()(const T1 &t1) const
EnumFunction m_activationFunctionType
Definition: NeuralNet.h:710
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
iterator_type valuesEnd()
returns iterator to the end of the (node) values
Definition: NeuralNet.h:590
neural net
Definition: NeuralNet.h:1068
std::shared_ptr< std::function< double(double)> > activationFunction() const
fetch the activation function for this layer
Definition: NeuralNet.h:695
double learningRate() const
get the learning rate
Definition: NeuralNet.h:778
size_t m_sizeOutput
outut size of this DNN
Definition: NeuralNet.h:1278
size_t m_numNodes
Definition: NeuralNet.h:707
std::vector< double > m_valueGradients
stores the gradients of the values (nodes)
Definition: NeuralNet.h:647
std::vector< double > m_significances
Definition: NeuralNet.h:1008
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:811
size_t m_convergenceSteps
number of steps without improvement to consider the DNN to have converged
Definition: NeuralNet.h:844
double sqrt(double)
container_type::const_iterator const_iterator_type
Definition: NeuralNet.h:443
MinimizerType fMinimizerType
Definition: NeuralNet.h:862
Double_t x[n]
Definition: legend1.C:17
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
std::vector< double > m_dropOut
Definition: NeuralNet.h:857
const_iterator_type valueGradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:604
std::vector< Layer > m_layers
layer-structure-data
Definition: NeuralNet.h:1279
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:592
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1290
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:611
size_t m_batchSize
mini-batch size
Definition: NeuralNet.h:845
const_iterator_type m_itConstWeightBegin
const iterator to the first weight of this layer in the weight vector
Definition: NeuralNet.h:652
function_container_type::const_iterator const_function_iterator_type
Definition: NeuralNet.h:447
ModeOutputValues m_eModeOutputValues
do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned...
Definition: NeuralNet.h:709
void removeLayer()
remove one layer
Definition: NeuralNet.h:1255
bool hasDropOut() const
has this layer drop-out turned on?
Definition: NeuralNet.h:626
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1099
const_iterator_type m_itInputBegin
iterator to the first of the nodes in the input node vector
Definition: NeuralNet.h:643
bool m_hasDropOut
dropOut is turned on?
Definition: NeuralNet.h:650
ClassificationSettings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, EnumRegularization _regularization=EnumRegularization::NONE, size_t _scaleToNumEvents=0, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _useMultithreading=true)
c&#39;tor
Definition: NeuralNet.h:908
const_iterator_type valueGradientsEnd() const
returns const iterator to the end of the gradients
Definition: NeuralNet.h:605
const_iterator end() const
Definition: NeuralNet.h:246
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:374
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:146
const Double_t sigma
double stdDev_corr() const
Definition: NeuralNet.h:145
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:826
double var() const
Definition: NeuralNet.h:128
EnumFunction activationFunctionType() const
get the activation function type for this layer
Definition: NeuralNet.h:698
std::vector< double > m_output
Definition: NeuralNet.h:1003
EColor
Definition: Rtypes.h:55
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:775
iterator_type valuesBegin()
returns iterator to the begin of the (node) values
Definition: NeuralNet.h:589
double stdDev() const
Definition: NeuralNet.h:146
void setMonitoring(std::shared_ptr< Monitoring > ptrMonitoring)
prepared for monitoring
Definition: NeuralNet.h:771
std::vector< Pattern > * m_pResultPatternContainer
Definition: NeuralNet.h:1016
std::vector< double > m_deltas
stores the deltas for the DNN training
Definition: NeuralNet.h:646
double m_minProgress
current limits for the progress bar
Definition: NeuralNet.h:840
iterator_type gradientsBegin()
returns iterator to the begin of the gradients
Definition: NeuralNet.h:607
double weights() const
Definition: NeuralNet.h:126
size_t dropRepetitions() const
Definition: NeuralNet.h:768
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:593
static double C[]
Double_t(* Function)(Double_t)
Definition: Functor.C:4
size_t m_testRepetitions
Definition: NeuralNet.h:846
container_type::iterator iterator_type
Definition: NeuralNet.h:442
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition: NeuralNet.h:1252
std::vector< Pattern >::const_iterator const_iterator
Definition: NeuralNet.h:238
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:795
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:776
RooCmdArg Minimizer(const char *type, const char *alg=0)
ModeOutputValues operator &=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:205
LayerData(LayerData &&other)
move c&#39;tor of LayerData
Definition: NeuralNet.h:542
size_t m_convergenceCount
Definition: NeuralNet.h:864
std::vector< double > container_type
Definition: NeuralNet.h:440
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:456
Timer m_timer
timer for monitoring
Definition: NeuralNet.h:839
size_t minError() const
returns the smallest error so far
Definition: NeuralNet.h:836
size_t numNodes() const
return the number of nodes of this layer
Definition: NeuralNet.h:692
unsigned int UInt_t
Definition: RtypesCore.h:42
double m_factorWeightDecay
Definition: NeuralNet.h:847
LayerData(const LayerData &other)
copy c&#39;tor of LayerData
Definition: NeuralNet.h:519
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:213
void modeOutputValues(ModeOutputValues eModeOutputValues)
set the mode-output-value
Definition: NeuralNet.h:690
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:825
Settings for the training of the neural net.
Definition: NeuralNet.h:736
virtual void startTrainCycle()
Definition: NeuralNet.h:789
virtual void drawSample(const std::vector< double > &, const std::vector< double > &, const std::vector< double > &, double)
callback for monitoring and loggging
Definition: NeuralNet.h:814
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
fetch the inverse activation function for this layer
Definition: NeuralNet.h:696
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1056
constexpr Double_t E()
Definition: TMath.h:74
ModeErrorFunction
error functions to be chosen from
Definition: NeuralNet.h:1045
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:377
Layer defines the layout of a layer.
Definition: NeuralNet.h:676
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
const_dropout_iterator m_itDropOut
iterator to a container indicating if the corresponding node is to be dropped
Definition: NeuralNet.h:649
Steepest(double learningRate=1e-4, double momentum=0.5, size_t repetitions=10)
c&#39;tor
Definition: NeuralNet.h:349
bool m_hasWeights
does this layer have weights (it does not if it is the input layer)
Definition: NeuralNet.h:659
std::vector< double > m_ams
Definition: NeuralNet.h:1007
EnumRegularization m_regularization
Definition: NeuralNet.h:854
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:587
void clearDropOut()
clear the drop-out-data for this layer
Definition: NeuralNet.h:624
ModeOutputValues
Definition: NeuralNet.h:179
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:14
MinimizerType minimizerType() const
which minimizer shall be used (e.g. SGD)
Definition: NeuralNet.h:781
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:235
TText * text
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:373
int type
Definition: TGX11.cxx:120
double mean() const
Definition: NeuralNet.h:127
container_type::iterator iterator_type
Definition: NeuralNet.h:1073
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:835
Double_t y[n]
Definition: legend1.C:17
std::vector< double > m_targets
Definition: NeuralNet.h:1004
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:544
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:22
std::vector< double > m_values
stores the values of the nodes in this layer
Definition: NeuralNet.h:648
std::vector< double > m_weights
Definition: NeuralNet.h:1005
const_iterator_type deltasEnd() const
returns const iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:599
size_t m_maxConvergenceCount
Definition: NeuralNet.h:865
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:596
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1101
This class is needed by JsMVA, and it&#39;s a helper class for tracking errors during the training in Jup...
Definition: MethodBase.h:89
Abstract ClassifierFactory template that handles arbitrary types.
size_t size() const
return the size of the layer
Definition: NeuralNet.h:629
const_iterator_type m_itInputEnd
iterator to the end of the nodes in the input node vector
Definition: NeuralNet.h:644
std::shared_ptr< std::function< double(double)> > m_activationFunction
activation function for this layer
Definition: NeuralNet.h:655
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
inverse activation function for this layer
Definition: NeuralNet.h:656
virtual void setProgressLimits(double minProgress=0, double maxProgress=100)
Definition: NeuralNet.h:797
size_t size() const
Definition: NeuralNet.h:248
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:774
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:812
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:376
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:601
const_iterator m_itBegin
iterator denoting the beginning of the batch
Definition: NeuralNet.h:251
std::vector< double > container_type
Definition: NeuralNet.h:1072
void clear()
clear the values and the deltas
Definition: NeuralNet.h:580
Settings for classificationused to distinguish between different function signatures.
Definition: NeuralNet.h:901
const_dropout_iterator dropOut() const
return the begin of the drop-out information
Definition: NeuralNet.h:627
void clear(std::string histoName)
for monitoring
Definition: NeuralNet.h:831
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:816
const_iterator_type deltasBegin() const
returns const iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:598
const_iterator begin() const
Definition: NeuralNet.h:245
Batch(typename std::vector< Pattern >::const_iterator itBegin, typename std::vector< Pattern >::const_iterator itEnd)
Definition: NeuralNet.h:240
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:449
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:372
EnumRegularization
Definition: NeuralNet.h:173
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:586
void setDropOut(Iterator itDrop)
set the drop-out info for this layer
Definition: NeuralNet.h:618
void addPoint(std::string histoName, double x, double y)
for monitoring
Definition: NeuralNet.h:829
size_t outputSize() const
output size of the DNN
Definition: NeuralNet.h:1106
ModeOutputValues operator|=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:194
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:410
bool exists(std::string histoName)
for monitoring
Definition: NeuralNet.h:832
TRandom3 R
a TMatrixD.
Definition: testIO.cxx:28
static double Q[]
LayerData holds the data of one layer.
Definition: NeuralNet.h:437
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:788
Net(const Net &other)
d&#39;tor
Definition: NeuralNet.h:1092
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1301
int repetitions() const
how many steps have to be gone until the batch is changed
Definition: NeuralNet.h:780
const_iterator m_itEnd
iterator denoting the end of the batch
Definition: NeuralNet.h:252
int randomInt(int maxValue)
Definition: NeuralNet.cxx:31
std::shared_ptr< std::function< double(double)> > m_activationFunction
stores the activation function
Definition: NeuralNet.h:703
std::vector< double > m_input
Definition: NeuralNet.h:1002