Logo ROOT   6.10/09
Reference Guide
NeuralNet.icc
Go to the documentation of this file.
1 #ifndef TMVA_NEURAL_NET_I
2 #define TMVA_NEURAL_NET_I
3 
4 #ifndef TMVA_NEURAL_NET
5 #error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."
6 #endif // TMVA_NEURAL_NET
7 #pragma once
8 #pragma GCC diagnostic ignored "-Wunused-variable"
9 
10 #include <tuple>
11 #include <future>
12 
13 #include "Math/Util.h"
14 
15 #include "TMVA/Pattern.h"
16 #include "TMVA/MethodBase.h"
17 
18 namespace TMVA
19 {
20  namespace DNN
21  {
22 
23 
24 
25 
26 
27 
28 
29 
30  template <typename T>
31  T uniformFromTo (T from, T to)
32  {
33  return from + (rand ()* (to - from)/RAND_MAX);
34  }
35 
36 
37 
38  template <typename Container, typename T>
39  void uniformDouble (Container& container, T maxValue)
40  {
41  for (auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
42  {
43 // (*it) = uniformFromTo (-1.0*maxValue, 1.0*maxValue);
44  (*it) = TMVA::DNN::uniformFromTo (-1.0*maxValue, 1.0*maxValue);
45  }
46  }
47 
48 
49  static std::shared_ptr<std::function<double(double)>> ZeroFnc = std::make_shared<std::function<double(double)>> ([](double /*value*/){ return 0; });
50 
51 
52  static std::shared_ptr<std::function<double(double)>> Sigmoid = std::make_shared<std::function<double(double)>> ([](double value){ value = std::max (-100.0, std::min (100.0,value)); return 1.0/(1.0 + std::exp (-value)); });
53  static std::shared_ptr<std::function<double(double)>> InvSigmoid = std::make_shared<std::function<double(double)>> ([](double value){ double s = (*Sigmoid.get ()) (value); return s*(1.0-s); });
54 
55  static std::shared_ptr<std::function<double(double)>> Tanh = std::make_shared<std::function<double(double)>> ([](double value){ return tanh (value); });
56  static std::shared_ptr<std::function<double(double)>> InvTanh = std::make_shared<std::function<double(double)>> ([](double value){ return 1.0 - std::pow (value, 2.0); });
57 
58  static std::shared_ptr<std::function<double(double)>> Linear = std::make_shared<std::function<double(double)>> ([](double value){ return value; });
59  static std::shared_ptr<std::function<double(double)>> InvLinear = std::make_shared<std::function<double(double)>> ([](double /*value*/){ return 1.0; });
60 
61  static std::shared_ptr<std::function<double(double)>> SymmReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.3; return value > margin ? value-margin : value < -margin ? value+margin : 0; });
62  static std::shared_ptr<std::function<double(double)>> InvSymmReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.3; return value > margin ? 1.0 : value < -margin ? 1.0 : 0; });
63 
64  static std::shared_ptr<std::function<double(double)>> ReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.0; return value > margin ? value-margin : 0; });
65  static std::shared_ptr<std::function<double(double)>> InvReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.0; return value > margin ? 1.0 : 0; });
66 
67  static std::shared_ptr<std::function<double(double)>> SoftPlus = std::make_shared<std::function<double(double)>> ([](double value){ return std::log (1.0+ std::exp (value)); });
68  static std::shared_ptr<std::function<double(double)>> InvSoftPlus = std::make_shared<std::function<double(double)>> ([](double value){ return 1.0 / (1.0 + std::exp (-value)); });
69 
70  static std::shared_ptr<std::function<double(double)>> TanhShift = std::make_shared<std::function<double(double)>> ([](double value){ return tanh (value-0.3); });
71  static std::shared_ptr<std::function<double(double)>> InvTanhShift = std::make_shared<std::function<double(double)>> ([](double value){ return 0.3 + (1.0 - std::pow (value, 2.0)); });
72 
73  static std::shared_ptr<std::function<double(double)>> SoftSign = std::make_shared<std::function<double(double)>> ([](double value){ return value / (1.0 + fabs (value)); });
74  static std::shared_ptr<std::function<double(double)>> InvSoftSign = std::make_shared<std::function<double(double)>> ([](double value){ return std::pow ((1.0 - fabs (value)),2.0); });
75 
76  static std::shared_ptr<std::function<double(double)>> Gauss = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return exp (-std::pow(value*s,2.0)); });
77  static std::shared_ptr<std::function<double(double)>> InvGauss = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return -2.0 * value * s*s * (*Gauss.get ()) (value); });
78 
79  static std::shared_ptr<std::function<double(double)>> GaussComplement = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return 1.0 - exp (-std::pow(value*s,2.0)); });
80  static std::shared_ptr<std::function<double(double)>> InvGaussComplement = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return +2.0 * value * s*s * (*GaussComplement.get ()) (value); });
81 
82 
83 
84 /*! \brief apply weights using drop-out; for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
85  *
86  * itDrop correlates with itSourceBegin
87  */
88 template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItTarget, typename ItDrop>
89  void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd,
90  ItWeight itWeight,
91  ItTarget itTargetBegin, ItTarget itTargetEnd,
92  ItDrop itDrop)
93  {
94  for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
95  {
96  for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
97  {
98  if (!HasDropOut || *itDrop)
99  (*itTarget) += (*itSource) * (*itWeight);
100  ++itWeight;
101  }
102  if (HasDropOut) ++itDrop;
103  }
104  }
105 
106 
107 
108 
109 
110 
111 /*! \brief apply weights backwards (for backprop); for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
112  *
113  * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards)
114  */
115 template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev, typename ItDrop>
116  void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd,
117  ItWeight itWeight,
118  ItPrev itPrevBegin, ItPrev itPrevEnd,
119  ItDrop itDrop)
120  {
121  for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
122  {
123  for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
124  {
125  if (!HasDropOut || *itDrop)
126  (*itPrev) += (*itCurr) * (*itWeight);
127  ++itWeight;
128  }
129  if (HasDropOut) ++itDrop;
130  }
131  }
132 
133 
134 
135 
136 
137 
138 
139 /*! \brief apply the activation functions
140  *
141  *
142  */
143 
144  template <typename ItValue, typename Fnc>
145  void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc)
146  {
147  while (itValue != itValueEnd)
148  {
149  auto& value = (*itValue);
150  value = (*fnc.get ()) (value);
151 
152  ++itValue;
153  }
154  }
155 
156 
157 /*! \brief apply the activation functions and compute the gradient
158  *
159  *
160  */
161  template <typename ItValue, typename Fnc, typename InvFnc, typename ItGradient>
162  void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
163  {
164  while (itValue != itValueEnd)
165  {
166  auto& value = (*itValue);
167  value = (*fnc.get ()) (value);
168  (*itGradient) = (*invFnc.get ()) (value);
169 
170  ++itValue; ++itGradient;
171  }
172  }
173 
174 
175 
176 /*! \brief update the gradients
177  *
178  *
179  */
180  template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
181  void update (ItSource itSource, ItSource itSourceEnd,
182  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
183  ItTargetGradient itTargetGradientBegin,
184  ItGradient itGradient)
185  {
186  while (itSource != itSourceEnd)
187  {
188  auto itTargetDelta = itTargetDeltaBegin;
189  auto itTargetGradient = itTargetGradientBegin;
190  while (itTargetDelta != itTargetDeltaEnd)
191  {
192  (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
193  ++itTargetDelta; ++itTargetGradient; ++itGradient;
194  }
195  ++itSource;
196  }
197  }
198 
199 
200 
201 
202 /*! \brief compute the regularization (L1, L2)
203  *
204  *
205  */
206  template <EnumRegularization Regularization>
207  inline double computeRegularization (double weight, const double& factorWeightDecay)
208  {
209  MATH_UNUSED(weight);
210  MATH_UNUSED(factorWeightDecay);
211 
212  return 0;
213  }
214 
215 // L1 regularization
216  template <>
217  inline double computeRegularization<EnumRegularization::L1> (double weight, const double& factorWeightDecay)
218  {
219  return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
220  }
221 
222 // L2 regularization
223  template <>
224  inline double computeRegularization<EnumRegularization::L2> (double weight, const double& factorWeightDecay)
225  {
226  return factorWeightDecay * weight;
227  }
228 
229 
230 /*! \brief update the gradients, using regularization
231  *
232  *
233  */
234  template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
235  void update (ItSource itSource, ItSource itSourceEnd,
236  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
237  ItTargetGradient itTargetGradientBegin,
238  ItGradient itGradient,
239  ItWeight itWeight, double weightDecay)
240  {
241  // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights
242  while (itSource != itSourceEnd)
243  {
244  auto itTargetDelta = itTargetDeltaBegin;
245  auto itTargetGradient = itTargetGradientBegin;
246  while (itTargetDelta != itTargetDeltaEnd)
247  {
248  (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,weightDecay);
249  ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
250  }
251  ++itSource;
252  }
253  }
254 
255 
256 
257 
258 
259 
260 #define USELOCALWEIGHTS 1
261 
262 
263 
264 /*! \brief implementation of the steepest gradient descent algorithm
265  *
266  * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle
267  */
268  template <typename Function, typename Weights, typename PassThrough>
269  double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough)
270  {
271  size_t numWeights = weights.size ();
272  // std::vector<double> gradients (numWeights, 0.0);
273  m_localGradients.assign (numWeights, 0.0);
274  // std::vector<double> localWeights (begin (weights), end (weights));
275  // m_localWeights.reserve (numWeights);
276  m_localWeights.assign (begin (weights), end (weights));
277 
278  double E = 1e10;
279  if (m_prevGradients.size () != numWeights)
280  {
281  m_prevGradients.clear ();
282  m_prevGradients.assign (weights.size (), 0);
283  }
284 
285  bool success = true;
286  size_t currentRepetition = 0;
287  while (success)
288  {
289  if (currentRepetition >= m_repetitions)
290  break;
291 
292  m_localGradients.assign (numWeights, 0.0);
293 
294  // --- nesterov momentum ---
295  // apply momentum before computing the new gradient
296  auto itPrevG = begin (m_prevGradients);
297  auto itPrevGEnd = end (m_prevGradients);
298  auto itLocWeight = begin (m_localWeights);
299  for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
300  {
301  (*itPrevG) *= m_beta;
302  (*itLocWeight) += (*itPrevG);
303  }
304 
305  E = fitnessFunction (passThrough, m_localWeights, m_localGradients);
306 // plotGradients (gradients);
307 // plotWeights (localWeights);
308 
309  double alpha = gaussDouble (m_alpha, m_alpha/2.0);
310 // double alpha = m_alpha;
311 
312  auto itG = begin (m_localGradients);
313  auto itGEnd = end (m_localGradients);
314  itPrevG = begin (m_prevGradients);
315  double maxGrad = 0.0;
316  for (; itG != itGEnd; ++itG, ++itPrevG)
317  {
318  double currGrad = (*itG);
319  double prevGrad = (*itPrevG);
320  currGrad *= alpha;
321 
322  //(*itPrevG) = m_beta * (prevGrad + currGrad);
323  currGrad += prevGrad;
324  (*itG) = currGrad;
325  (*itPrevG) = currGrad;
326 
327  if (std::fabs (currGrad) > maxGrad)
328  maxGrad = currGrad;
329  }
330 
331  if (maxGrad > 1)
332  {
333  m_alpha /= 2;
334  std::cout << "\nlearning rate reduced to " << m_alpha << std::endl;
335  std::for_each (weights.begin (), weights.end (), [maxGrad](double& w)
336  {
337  w /= maxGrad;
338  });
339  m_prevGradients.clear ();
340  }
341  else
342  {
343  auto itW = std::begin (weights);
344  std::for_each (std::begin (m_localGradients), std::end (m_localGradients), [&itW](double& g)
345  {
346  *itW += g;
347  ++itW;
348  });
349  }
350 
351  ++currentRepetition;
352  }
353  return E;
354  }
355 
356 
357 
358 
359 
360 
361 
362 
363 
364 
365 
366 
367 
368 
369 
370 
371 
372 
373 
374 
375 /*! \brief sum of squares error function
376  *
377  *
378  */
379  template <typename ItOutput, typename ItTruth, typename ItDelta, typename InvFnc>
380  double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight)
381  {
382  double errorSum = 0.0;
383 
384  // output - truth
385  ItTruth itTruth = itTruthBegin;
386  bool hasDeltas = (itDelta != itDeltaEnd);
387  for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
388  {
389 // assert (itTruth != itTruthEnd);
390  double output = (*itOutput);
391  double error = output - (*itTruth);
392  if (hasDeltas)
393  {
394  (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
395  ++itDelta;
396  }
397  errorSum += error*error * patternWeight;
398  }
399 
400  return 0.5*errorSum;
401  }
402 
403 
404 
405 /*! \brief cross entropy error function
406  *
407  *
408  */
409  template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
410  double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
411  {
412  bool hasDeltas = (itDelta != itDeltaEnd);
413 
414  double errorSum = 0.0;
415  for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
416  {
417  double probability = *itProbability;
418  double truth = *itTruthBegin;
419  /* truth = truth < 0.1 ? 0.1 : truth; */
420  /* truth = truth > 0.9 ? 0.9 : truth; */
421  truth = truth < 0.5 ? 0.1 : 0.9;
422  if (hasDeltas)
423  {
424  double delta = probability - truth;
425  (*itDelta) = delta*patternWeight;
426 // (*itDelta) = (*itInvActFnc)(probability) * delta * patternWeight;
427  ++itDelta;
428  }
429  double error (0);
430  if (probability == 0) // protection against log (0)
431  {
432  if (truth >= 0.5)
433  error += 1.0;
434  }
435  else if (probability == 1)
436  {
437  if (truth < 0.5)
438  error += 1.0;
439  }
440  else
441  error += - (truth * log (probability) + (1.0-truth) * log (1.0-probability)); // cross entropy function
442  errorSum += error * patternWeight;
443 
444  }
445  return errorSum;
446  }
447 
448 
449 
450 
451 /*! \brief soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
452  *
453  *
454  */
455  template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
456  double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
457  {
458  double errorSum = 0.0;
459 
460  bool hasDeltas = (itDelta != itDeltaEnd);
461  // output - truth
462  ItTruth itTruth = itTruthBegin;
463  for (auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
464  {
465 // assert (itTruth != itTruthEnd);
466  double probability = (*itProbability);
467  double truth = (*itTruth);
468  if (hasDeltas)
469  {
470  (*itDelta) = probability - truth;
471 // (*itDelta) = (*itInvActFnc)(sm) * delta * patternWeight;
472  ++itDelta; //++itInvActFnc;
473  }
474  double error (0);
475 
476  error += truth * log (probability);
477  errorSum += error;
478  }
479 
480  return -errorSum * patternWeight;
481  }
482 
483 
484 
485 
486 
487 
488 
489 
490 
491 /*! \brief compute the weight decay for regularization (L1 or L2)
492  *
493  *
494  */
495  template <typename ItWeight>
496  double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
497  {
498  if (eRegularization == EnumRegularization::L1)
499  {
500  // weight decay (regularization)
501  double w = 0;
502  size_t n = 0;
503  for (; itWeight != itWeightEnd; ++itWeight, ++n)
504  {
505  double weight = (*itWeight);
506  w += std::fabs (weight);
507  }
508  return error + 0.5 * w * factorWeightDecay / n;
509  }
510  else if (eRegularization == EnumRegularization::L2)
511  {
512  // weight decay (regularization)
513  double w = 0;
514  size_t n = 0;
515  for (; itWeight != itWeightEnd; ++itWeight, ++n)
516  {
517  double weight = (*itWeight);
518  w += weight*weight;
519  }
520  return error + 0.5 * w * factorWeightDecay / n;
521  }
522  else
523  return error;
524  }
525 
526 
527 
528 
529 
530 
531 
532 
533 
534 
535 
536 
537 
538 
539 /*! \brief apply the weights (and functions) in forward direction of the DNN
540  *
541  *
542  */
543  template <typename LAYERDATA>
544  void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
545  {
546  if (prevLayerData.hasDropOut ())
547  {
548  applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
549  currLayerData.weightsBegin (),
550  currLayerData.valuesBegin (), currLayerData.valuesEnd (),
551  prevLayerData.dropOut ());
552  }
553  else
554  {
555  bool dummy = true;
556  applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
557  currLayerData.weightsBegin (),
558  currLayerData.valuesBegin (), currLayerData.valuesEnd (),
559  &dummy); // dummy to turn on all nodes (no drop out)
560  }
561  }
562 
563 
564 
565 /*! \brief backward application of the weights (back-propagation of the error)
566  *
567  *
568  */
569 template <typename LAYERDATA>
570  void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
571 {
572  if (prevLayerData.hasDropOut ())
573  {
574  applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
575  currLayerData.weightsBegin (),
576  prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
577  prevLayerData.dropOut ());
578  }
579  else
580  {
581  bool dummy = true;
582  applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
583  currLayerData.weightsBegin (),
584  prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
585  &dummy); // dummy to use all nodes (no drop out)
586  }
587 }
588 
589 
590 
591 
592 
593 /*! \brief update the node values
594  *
595  *
596  */
597  template <typename LAYERDATA>
598  void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization)
599  {
600  // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights
601  if (factorWeightDecay != 0.0) // has weight regularization
602  if (regularization == EnumRegularization::L1) // L1 regularization ( sum(|w|) )
603  {
604  update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
605  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
606  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
607  currLayerData.weightsBegin (), factorWeightDecay);
608  }
609  else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) )
610  {
611  update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
612  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
613  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
614  currLayerData.weightsBegin (), factorWeightDecay);
615  }
616  else
617  {
618  update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
619  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
620  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
621  }
622 
623  else
624  { // no weight regularization
625  update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
626  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
627  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
628  }
629  }
630 
631 
632 
633 
634 
635 
636 
637 
638 
639 
640 
641 
642 /*! \brief compute the drop-out-weight factor
643  *
644  * when using drop-out a fraction of the nodes is turned off at each cycle of the computation
645  * once all nodes are turned on again (for instances when the test samples are evaluated),
646  * the weights have to be adjusted to account for the different number of active nodes
647  * this function computes the factor and applies it to the weights
648  */
649  template <typename WeightsType, typename DropProbabilities>
650  void Net::dropOutWeightFactor (WeightsType& weights,
651  const DropProbabilities& drops,
652  bool inverse)
653  {
654  if (drops.empty () || weights.empty ())
655  return;
656 
657  auto itWeight = std::begin (weights);
658  auto itWeightEnd = std::end (weights);
659  auto itDrop = std::begin (drops);
660  auto itDropEnd = std::end (drops);
661  size_t numNodesPrev = inputSize ();
662  double dropFractionPrev = *itDrop;
663  ++itDrop;
664 
665  for (auto& layer : layers ())
666  {
667  if (itDrop == itDropEnd)
668  break;
669 
670  size_t _numNodes = layer.numNodes ();
671 
672  double dropFraction = *itDrop;
673  double pPrev = 1.0 - dropFractionPrev;
674  double p = 1.0 - dropFraction;
675  p *= pPrev;
676 
677  if (inverse)
678  {
679  p = 1.0/p;
680  }
681  size_t _numWeights = layer.numWeights (numNodesPrev);
682  for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
683  {
684  if (itWeight == itWeightEnd)
685  break;
686 
687  *itWeight *= p;
688  ++itWeight;
689  }
690  numNodesPrev = _numNodes;
691  dropFractionPrev = dropFraction;
692  ++itDrop;
693  }
694  }
695 
696 
697 
698 
699 
700 
701 /*! \brief execute the training until convergence emerges
702  *
703  * \param weights the container with the weights (synapses)
704  * \param trainPattern the pattern for the training
705  * \param testPattern the pattern for the testing
706  * \param minimizer the minimizer (e.g. steepest gradient descent) to be used
707  * \param settings the settings for the training (e.g. multithreading or not, regularization etc.)
708  */
709  template <typename Minimizer>
710  double Net::train (std::vector<double>& weights,
711  std::vector<Pattern>& trainPattern,
712  const std::vector<Pattern>& testPattern,
713  Minimizer& minimizer,
714  Settings& settings)
715  {
716 // std::cout << "START TRAINING" << std::endl;
717  settings.startTrainCycle ();
718 
719  // JsMVA progress bar maximum (100%)
720  if (fIPyMaxIter) *fIPyMaxIter = 100;
721 
722  settings.pads (4);
723  settings.create ("trainErrors", 100, 0, 100, 100, 0,1);
724  settings.create ("testErrors", 100, 0, 100, 100, 0,1);
725 
726  size_t cycleCount = 0;
727  size_t testCycleCount = 0;
728  double testError = 1e20;
729  double trainError = 1e20;
730  size_t dropOutChangeCount = 0;
731 
732  DropContainer dropContainer;
733  DropContainer dropContainerTest;
734  const std::vector<double>& dropFractions = settings.dropFractions ();
735  bool isWeightsForDrop = false;
736 
737 
738  // until convergence
739  do
740  {
741  ++cycleCount;
742 
743  // if dropOut enabled
744  size_t dropIndex = 0;
745  if (!dropFractions.empty () && dropOutChangeCount % settings.dropRepetitions () == 0)
746  {
747  // fill the dropOut-container
748  dropContainer.clear ();
749  size_t _numNodes = inputSize ();
750  double dropFraction = 0.0;
751  dropFraction = dropFractions.at (dropIndex);
752  ++dropIndex;
753  fillDropContainer (dropContainer, dropFraction, _numNodes);
754  for (auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
755  {
756  auto& layer = *itLayer;
757  _numNodes = layer.numNodes ();
758  // how many nodes have to be dropped
759  dropFraction = 0.0;
760  if (dropFractions.size () > dropIndex)
761  dropFraction = dropFractions.at (dropIndex);
762 
763  fillDropContainer (dropContainer, dropFraction, _numNodes);
764  }
765  isWeightsForDrop = true;
766  }
767 
768  // execute training cycle
769  trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
770 
771 
772  // ------ check if we have to execute a test ------------------
773  bool hasConverged = false;
774  if (testCycleCount % settings.testRepetitions () == 0) // we test only everye "testRepetitions" repetition
775  {
776  if (isWeightsForDrop)
777  {
778  dropOutWeightFactor (weights, dropFractions);
779  isWeightsForDrop = false;
780  }
781 
782 
783  testError = 0;
784  //double weightSum = 0;
785  settings.startTestCycle ();
786  if (settings.useMultithreading ())
787  {
788  size_t numThreads = std::thread::hardware_concurrency ();
789  size_t patternPerThread = testPattern.size () / numThreads;
790  std::vector<Batch> batches;
791  auto itPat = testPattern.begin ();
792  // auto itPatEnd = testPattern.end ();
793  for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
794  {
795  batches.push_back (Batch (itPat, itPat + patternPerThread));
796  itPat += patternPerThread;
797  }
798  if (itPat != testPattern.end ())
799  batches.push_back (Batch (itPat, testPattern.end ()));
800 
801  std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
802  for (auto& batch : batches)
803  {
804  // -------------------- execute each of the batch ranges on a different thread -------------------------------
805  futures.push_back (
806  std::async (std::launch::async, [&]()
807  {
808  std::vector<double> localOutput;
809  pass_through_type passThrough (settings, batch, dropContainerTest);
810  double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput);
811  return std::make_tuple (testBatchError, localOutput);
812  })
813  );
814  }
815 
816  auto itBatch = batches.begin ();
817  for (auto& f : futures)
818  {
819  std::tuple<double,std::vector<double>> result = f.get ();
820  testError += std::get<0>(result) / batches.size ();
821  std::vector<double> output = std::get<1>(result);
822  if (output.size() == (outputSize() - 1) * itBatch->size())
823  {
824  auto output_iterator = output.begin();
825  for (auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
826  {
827  for (size_t output_index = 1; output_index < outputSize(); ++output_index)
828  {
829  settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
830  (*pattern_it).weight ());
831  ++output_iterator;
832  }
833  }
834  }
835  ++itBatch;
836  }
837 
838  }
839  else
840  {
841  std::vector<double> output;
842  //for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it)
843  {
844  //const Pattern& p = (*it);
845  //double weight = p.weight ();
846  //Batch batch (it, it+1);
847  Batch batch (begin (testPattern), end (testPattern));
848  output.clear ();
849  pass_through_type passThrough (settings, batch, dropContainerTest);
850  double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output);
851  if (output.size() == (outputSize() - 1) * batch.size())
852  {
853  auto output_iterator = output.begin();
854  for (auto pattern_it = batch.begin(); pattern_it != batch.end(); ++pattern_it)
855  {
856  for (size_t output_index = 1; output_index < outputSize(); ++output_index)
857  {
858  settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
859  (*pattern_it).weight ());
860  ++output_iterator;
861  }
862  }
863  }
864  testError += testPatternError; /// batch.size ();
865  }
866  // testError /= testPattern.size ();
867  }
868  settings.endTestCycle ();
869 // testError /= weightSum;
870 
871  settings.computeResult (*this, weights);
872 
873  hasConverged = settings.hasConverged (testError);
874  if (!hasConverged && !isWeightsForDrop)
875  {
876  dropOutWeightFactor (weights, dropFractions, true); // inverse
877  isWeightsForDrop = true;
878  }
879  }
880  ++testCycleCount;
881  ++dropOutChangeCount;
882 
883 
884  static double x = -1.0;
885  x += 1.0;
886 // settings.resetPlot ("errors");
887  settings.addPoint ("trainErrors", cycleCount, trainError);
888  settings.addPoint ("testErrors", cycleCount, testError);
889  settings.plot ("trainErrors", "C", 1, kBlue);
890  settings.plot ("testErrors", "C", 1, kMagenta);
891 
892 
893  // setup error plots and progress bar variables for JsMVA
894  if (fInteractive){
895  fInteractive->AddPoint(cycleCount, trainError, testError);
896  if (*fExitFromTraining) break;
897  *fIPyCurrentIter = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
898  }
899 
900  if (hasConverged)
901  break;
902 
903  if ((int)cycleCount % 10 == 0) {
904 
905  TString convText = Form( "(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
906  trainError,
907  testError,
908  (int)cycleCount,
909  (int)settings.convergenceCount (),
910  (int)settings.maxConvergenceCount ());
911  double progress = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
912  settings.cycle (progress, convText);
913  }
914  }
915  while (true);
916  settings.endTrainCycle (trainError);
917 
918  TString convText = Form( "(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (int)cycleCount);
919  double progress = 100*(double)settings.maxConvergenceCount() /(double)settings.convergenceSteps ();
920  settings.cycle (progress, convText);
921 
922  return testError;
923  }
924 
925 
926 
927 /*! \brief execute a single training cycle
928  *
929  * uses multithreading if turned on
930  *
931  * \param minimizer the minimizer to be used (e.g. SGD)
932  * \param weights the weight container with all the synapse weights
933  * \param itPatternBegin begin of the pattern container
934  * \parama itPatternEnd the end of the pattern container
935  * \param settings the settings for this training (e.g. multithreading or not, regularization, etc.)
936  * \param dropContainer the data for dropping-out nodes (regularization technique)
937  */
938  template <typename Iterator, typename Minimizer>
939  inline double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weights,
940  Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer)
941  {
942  double error = 0.0;
943  size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
944  size_t numBatches = numPattern/settings.batchSize ();
945  size_t numBatches_stored = numBatches;
946 
947  std::random_shuffle (itPatternBegin, itPatternEnd);
948  Iterator itPatternBatchBegin = itPatternBegin;
949  Iterator itPatternBatchEnd = itPatternBatchBegin;
950 
951  // create batches
952  std::vector<Batch> batches;
953  while (numBatches > 0)
954  {
955  std::advance (itPatternBatchEnd, settings.batchSize ());
956  batches.push_back (Batch (itPatternBatchBegin, itPatternBatchEnd));
957  itPatternBatchBegin = itPatternBatchEnd;
958  --numBatches;
959  }
960 
961  // add the last pattern to the last batch
962  if (itPatternBatchEnd != itPatternEnd)
963  batches.push_back (Batch (itPatternBatchEnd, itPatternEnd));
964 
965 
966  ///< turn on multithreading if requested
967  if (settings.useMultithreading ())
968  {
969  // -------------------- divide the batches into bunches for each thread --------------
970  size_t numThreads = std::thread::hardware_concurrency ();
971  size_t batchesPerThread = batches.size () / numThreads;
972  typedef std::vector<Batch>::iterator batch_iterator;
973  std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
974  batch_iterator itBatchBegin = std::begin (batches);
975  batch_iterator itBatchCurrEnd = std::begin (batches);
976  batch_iterator itBatchEnd = std::end (batches);
977  for (size_t iT = 0; iT < numThreads; ++iT)
978  {
979  if (iT == numThreads-1)
980  itBatchCurrEnd = itBatchEnd;
981  else
982  std::advance (itBatchCurrEnd, batchesPerThread);
983  batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
984  itBatchBegin = itBatchCurrEnd;
985  }
986 
987  // -------------------- loop over batches -------------------------------------------
988  std::vector<std::future<double>> futures;
989  for (auto& batchRange : batchVec)
990  {
991  // -------------------- execute each of the batch ranges on a different thread -------------------------------
992  futures.push_back (
993  std::async (std::launch::async, [&]()
994  {
995  double localError = 0.0;
996  for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
997  {
998  Batch& batch = *it;
999  pass_through_type settingsAndBatch (settings, batch, dropContainer);
1000  Minimizer minimizerClone (minimizer);
1001  localError += minimizerClone ((*this), weights, settingsAndBatch); /// call the minimizer
1002  }
1003  return localError;
1004  })
1005  );
1006  }
1007 
1008  for (auto& f : futures)
1009  error += f.get ();
1010  }
1011  else
1012  {
1013  for (auto& batch : batches)
1014  {
1015  std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1016  error += minimizer ((*this), weights, settingsAndBatch);
1017  }
1018  }
1019 
1020  numBatches_stored = std::max (numBatches_stored, size_t(1)); /// normalize the error
1021  error /= numBatches_stored;
1022  settings.testIteration ();
1023 
1024  return error;
1025  }
1026 
1027 
1028 
1029 
1030 
1031 /*! \brief compute the neural net
1032  *
1033  * \param input the input data
1034  * \param weights the weight data
1035  */
1036  template <typename Weights>
1037  std::vector<double> Net::compute (const std::vector<double>& input, const Weights& weights) const
1038  {
1039  std::vector<LayerData> layerData;
1040  layerData.reserve (m_layers.size ()+1);
1041  auto itWeight = begin (weights);
1042  auto itInputBegin = begin (input);
1043  auto itInputEnd = end (input);
1044  layerData.push_back (LayerData (itInputBegin, itInputEnd));
1045  size_t numNodesPrev = input.size ();
1046 
1047  // -------------------- prepare layer data with one pattern -------------------------------
1048  for (auto& layer: m_layers)
1049  {
1050  layerData.push_back (LayerData (layer.numNodes (), itWeight,
1051  layer.activationFunction (),
1052  layer.modeOutputValues ()));
1053  size_t _numWeights = layer.numWeights (numNodesPrev);
1054  itWeight += _numWeights;
1055  numNodesPrev = layer.numNodes ();
1056  }
1057 
1058 
1059  // --------- forward -------------
1060  forwardPattern (m_layers, layerData);
1061 
1062  // ------------- fetch output ------------------
1063  std::vector<double> output;
1064  fetchOutput (layerData.back (), output);
1065  return output;
1066  }
1067 
1068 
1069  template <typename Weights, typename PassThrough>
1070  double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights) const
1071  {
1072  std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1073  assert (numWeights () == weights.size ());
1074  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing, false);
1075  return error;
1076  }
1077 
1078  template <typename Weights, typename PassThrough, typename OutContainer>
1079  double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput /*eFetch*/, OutContainer& outputContainer) const
1080  {
1081  std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1082  assert (numWeights () == weights.size ());
1083  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer, true);
1084  return error;
1085  }
1086 
1087 
1088  template <typename Weights, typename Gradients, typename PassThrough>
1089  double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const
1090  {
1091  std::vector<double> nothing;
1092  assert (numWeights () == weights.size ());
1093  assert (weights.size () == gradients.size ());
1094  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing, false);
1095  return error;
1096  }
1097 
1098  template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1099  double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const
1100  {
1101  MATH_UNUSED(eFetch);
1102  assert (numWeights () == weights.size ());
1103  assert (weights.size () == gradients.size ());
1104  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer, true);
1105  return error;
1106  }
1107 
1108 
1109 
1110  template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1111  std::vector<std::vector<LayerData>> Net::prepareLayerData (LayerContainer& _layers,
1112  Batch& batch,
1113  const DropContainer& dropContainer,
1114  ItWeight itWeightBegin,
1115  ItWeight /*itWeightEnd*/,
1116  ItGradient itGradientBegin,
1117  ItGradient itGradientEnd,
1118  size_t& totalNumWeights) const
1119  {
1121  bool usesDropOut = !dropContainer.empty ();
1122  if (usesDropOut)
1123  itDropOut = std::begin (dropContainer);
1124 
1125  if (_layers.empty ())
1126  throw std::string ("no layers in this net");
1127 
1128 
1129  // ----------- create layer data -------------------------------------------------------
1130  //LM- This assert not needed anymore (outputsize is actually numNodes+1)
1131  //assert (_layers.back ().numNodes () == outputSize ());
1132  totalNumWeights = 0;
1133  size_t totalNumNodes = 0;
1134  std::vector<std::vector<LayerData>> layerPatternData;
1135  layerPatternData.reserve (_layers.size ()+1);
1136  ItWeight itWeight = itWeightBegin;
1137  ItGradient itGradient = itGradientBegin;
1138  size_t numNodesPrev = inputSize ();
1139  typename Pattern::const_iterator itInputBegin;
1140  typename Pattern::const_iterator itInputEnd;
1141 
1142  // ItWeight itGammaBegin = itWeightBegin + numWeights ();
1143  // ItWeight itBetaBegin = itWeightBegin + numWeights () + numNodes ();
1144  // ItGradient itGradGammaBegin = itGradientBegin + numWeights ();
1145  // ItGradient itGradBetaBegin = itGradientBegin + numWeights () + numNodes ();
1146 
1147 
1148  // --------------------- prepare layer data for input layer ----------------------------
1149  layerPatternData.push_back (std::vector<LayerData>());
1150  for (const Pattern& _pattern : batch)
1151  {
1152  std::vector<LayerData>& layerData = layerPatternData.back ();
1153  layerData.push_back (LayerData (numNodesPrev));
1154 
1155  itInputBegin = _pattern.beginInput ();
1156  itInputEnd = _pattern.endInput ();
1157  layerData.back ().setInput (itInputBegin, itInputEnd);
1158 
1159  if (usesDropOut)
1160  layerData.back ().setDropOut (itDropOut);
1161 
1162  }
1163 
1164 
1165  if (usesDropOut)
1166  itDropOut += _layers.back ().numNodes ();
1167 
1168  // ---------------- prepare subsequent layers ---------------------------------------------
1169  // for each of the layers
1170  for (auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1171  {
1172  bool isOutputLayer = (itLayer+1 == itLayerEnd);
1173  bool isFirstHiddenLayer = (itLayer == begin (_layers));
1174 
1175  auto& layer = *itLayer;
1176  layerPatternData.push_back (std::vector<LayerData>());
1177  // for each pattern, prepare a layerData
1178  for (const Pattern& _pattern : batch)
1179  {
1180  std::vector<LayerData>& layerData = layerPatternData.back ();
1181  //layerData.push_back (LayerData (numNodesPrev));
1182 
1183  if (itGradientBegin == itGradientEnd)
1184  {
1185  layerData.push_back (LayerData (layer.numNodes (), itWeight,
1186  layer.activationFunction (),
1187  layer.modeOutputValues ()));
1188  }
1189  else
1190  {
1191  layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient,
1192  layer.activationFunction (),
1193  layer.inverseActivationFunction (),
1194  layer.modeOutputValues ()));
1195  }
1196 
1197  if (usesDropOut)
1198  {
1199  layerData.back ().setDropOut (itDropOut);
1200  }
1201 
1202  }
1203 
1204  if (usesDropOut)
1205  {
1206  itDropOut += layer.numNodes ();
1207  }
1208  size_t _numWeights = layer.numWeights (numNodesPrev);
1209  totalNumWeights += _numWeights;
1210  itWeight += _numWeights;
1211  itGradient += _numWeights;
1212  numNodesPrev = layer.numNodes ();
1213  totalNumNodes += numNodesPrev;
1214 
1215  }
1216  assert (totalNumWeights > 0);
1217  return layerPatternData;
1218 }
1219 
1220 
1221 
1222  template <typename LayerContainer>
1223  void Net::forwardPattern (const LayerContainer& _layers,
1224  std::vector<LayerData>& layerData) const
1225  {
1226  size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1227  size_t cumulativeNodeCount = 0;
1228  for (; idxLayer < idxLayerEnd; ++idxLayer)
1229  {
1230  LayerData& prevLayerData = layerData.at (idxLayer);
1231  LayerData& currLayerData = layerData.at (idxLayer+1);
1232 
1233  forward (prevLayerData, currLayerData);
1234 
1235  applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1236  }
1237  }
1238 
1239 
1240 
1241 
1242  template <typename LayerContainer, typename LayerPatternContainer>
1243  void Net::forwardBatch (const LayerContainer& _layers,
1244  LayerPatternContainer& layerPatternData,
1245  std::vector<double>& valuesMean,
1246  std::vector<double>& valuesStdDev,
1247  size_t trainFromLayer) const
1248  {
1249  valuesMean.clear ();
1250  valuesStdDev.clear ();
1251 
1252  // ---------------------------------- loop over layers and pattern -------------------------------------------------------
1253  size_t cumulativeNodeCount = 0;
1254  for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1255  {
1256  bool doTraining = idxLayer >= trainFromLayer;
1257 
1258  // get layer-pattern data for this and the corresponding one from the next layer
1259  std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1260  std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1261 
1262  size_t numPattern = prevLayerPatternData.size ();
1263  size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1264 
1265  std::vector<MeanVariance> means (numNodesLayer);
1266  // ---------------- loop over layerDatas of pattern compute forward ----------------------------
1267  for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1268  {
1269  const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1270  LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1271 
1272 
1273  forward (prevLayerData, currLayerData); // feed forward
1274  }
1275 
1276  // ---------------- loop over layerDatas of pattern apply non-linearities ----------------------------
1277  for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1278  {
1279  //const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1280  LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1281 
1282  if (doTraining)
1283  applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (),
1284  currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ());
1285  else
1286  applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1287  }
1288 
1289  // accumulate node count
1290  cumulativeNodeCount += numNodesLayer;
1291  }
1292 }
1293 
1294 
1295 
1296 
1297  template <typename OutputContainer>
1298  void Net::fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const
1299  {
1300  ModeOutputValues eModeOutput = lastLayerData.outputMode ();
1301  if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput))
1302  {
1303  outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ());
1304  }
1305  else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) ||
1306  isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput))
1307  {
1308  const auto& prob = lastLayerData.probabilities ();
1309  outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1310  }
1311  else
1312  assert (false);
1313  }
1314 
1315 
1316 
1317 
1318  template <typename OutputContainer>
1319  void Net::fetchOutput (const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer) const
1320  {
1321  for (const LayerData& lastLayerData : lastLayerPatternData)
1322  fetchOutput (lastLayerData, outputContainer);
1323  }
1324 
1325 
1326 
1327  template <typename ItWeight>
1328  std::tuple</*sumError*/double,/*sumWeights*/double> Net::computeError (const Settings& settings,
1329  std::vector<LayerData>& lastLayerData,
1330  Batch& batch,
1331  ItWeight itWeightBegin,
1332  ItWeight itWeightEnd) const
1333  {
1334  typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1335 // typename std::vector<LayerData>::iterator itLayerDataEnd = lastLayerData.end ();
1336 
1337  typename std::vector<Pattern>::const_iterator itPattern = batch.begin ();
1338  typename std::vector<Pattern>::const_iterator itPatternEnd = batch.end ();
1339 
1340  double sumWeights (0.0);
1341  double sumError (0.0);
1342 
1343  size_t idxPattern = 0;
1344 // FIXME: check that iteration doesn't go beyond itLayerDataEnd!
1345  for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1346  {
1347  ++idxPattern;
1348 
1349  // compute E and the deltas of the computed output and the true output
1350  LayerData& layerData = (*itLayerData);
1351  const Pattern& _pattern = (*itPattern);
1352  double error = errorFunction (layerData, _pattern.output (),
1353  itWeightBegin, itWeightEnd,
1354  _pattern.weight (), settings.factorWeightDecay (),
1355  settings.regularization ());
1356  sumWeights += fabs (_pattern.weight ());
1357  sumError += error;
1358  }
1359  return std::make_tuple (sumError, sumWeights);
1360  }
1361 
1362 
1363 
1364  template <typename Settings>
1365  void Net::backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1366  const Settings& settings,
1367  size_t trainFromLayer,
1368  size_t totalNumWeights) const
1369  {
1370  bool doTraining = layerPatternData.size () > trainFromLayer;
1371  if (doTraining) // training
1372  {
1373  // ------------- backpropagation -------------
1374  size_t idxLayer = layerPatternData.size ();
1375  for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1376  itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1377  {
1378  --idxLayer;
1379  if (idxLayer <= trainFromLayer) // no training
1380  break;
1381 
1382  std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1383  std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1384 
1385  size_t idxPattern = 0;
1386 // FIXME: check that itPrevLayerData doesn't go beyond itPrevLayerDataEnd!
1387  for (typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1388  itPrevLayerData = begin (prevLayerDataColl) /*, itPrevLayerDataEnd = end (prevLayerDataColl)*/;
1389  itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1390  {
1391  LayerData& currLayerData = (*itCurrLayerData);
1392  LayerData& prevLayerData = *(itPrevLayerData);
1393 
1394  backward (prevLayerData, currLayerData);
1395 
1396  // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses)
1397  // because L1 and L2 regularization
1398  //
1399  // http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization
1400  //
1401  // L1 : -factorWeightDecay*sgn(w)/numWeights
1402  // L2 : -factorWeightDecay/numWeights
1403  update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ());
1404  }
1405  }
1406  }
1407  }
1408 
1409 
1410 
1411 /*! \brief forward propagation and backward propagation
1412  *
1413  *
1414  */
1415  template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1416  double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch,
1417  ItWeight itWeightBegin, ItWeight itWeightEnd,
1418  ItGradient itGradientBegin, ItGradient itGradientEnd,
1419  size_t trainFromLayer,
1420  OutContainer& outputContainer, bool doFetchOutput) const
1421  {
1422  Settings& settings = std::get<0>(settingsAndBatch);
1423  Batch& batch = std::get<1>(settingsAndBatch);
1424  DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1425 
1426  double sumError = 0.0;
1427  double sumWeights = 0.0; // -------------
1428 
1429 
1430  // ----------------------------- prepare layer data -------------------------------------
1431  size_t totalNumWeights (0);
1432  std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1433  batch,
1434  dropContainer,
1435  itWeightBegin,
1436  itWeightEnd,
1437  itGradientBegin,
1438  itGradientEnd,
1439  totalNumWeights);
1440 
1441 
1442 
1443  // ---------------------------------- propagate forward ------------------------------------------------------------------
1444  std::vector<double> valuesMean;
1445  std::vector<double> valuesStdDev;
1446  forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1447 
1448 
1449  // ------------- fetch output ------------------
1450  if (doFetchOutput)
1451  {
1452  fetchOutput (layerPatternData.back (), outputContainer);
1453  }
1454 
1455 
1456  // ------------- error computation -------------
1457  std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1458 
1459 
1460  // ------------- backpropagation -------------
1461  backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1462 
1463 
1464  // --- compile the measures
1465  double batchSize = std::distance (std::begin (batch), std::end (batch));
1466  for (auto it = itGradientBegin; it != itGradientEnd; ++it)
1467  (*it) /= batchSize;
1468 
1469 
1470  sumError /= sumWeights;
1471  return sumError;
1472  }
1473 
1474 
1475 
1476 /*! \brief initialization of the weights
1477  *
1478  *
1479  */
1480  template <typename OutIterator>
1481  void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
1482  {
1483  if (eInitStrategy == WeightInitializationStrategy::XAVIER)
1484  {
1485  // input and output properties
1486  int numInput = inputSize ();
1487 
1488  // compute variance and mean of input and output
1489  //...
1490 
1491 
1492  // compute the weights
1493  for (auto& layer: layers ())
1494  {
1495  double nIn = numInput;
1496  double stdDev = sqrt (2.0/nIn);
1497  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1498  {
1499  (*itWeight) = DNN::gaussDouble (0.0, stdDev); // factor 2.0 for ReLU
1500  ++itWeight;
1501  }
1502  numInput = layer.numNodes ();
1503  }
1504  return;
1505  }
1506 
1507  if (eInitStrategy == WeightInitializationStrategy::XAVIERUNIFORM)
1508  {
1509  // input and output properties
1510  int numInput = inputSize ();
1511 
1512  // compute variance and mean of input and output
1513  //...
1514 
1515 
1516  // compute the weights
1517  for (auto& layer: layers ())
1518  {
1519  double nIn = numInput;
1520  double minVal = -sqrt(2.0/nIn);
1521  double maxVal = sqrt (2.0/nIn);
1522  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1523  {
1524 
1525  (*itWeight) = DNN::uniformDouble (minVal, maxVal); // factor 2.0 for ReLU
1526  ++itWeight;
1527  }
1528  numInput = layer.numNodes ();
1529  }
1530  return;
1531  }
1532 
1533  if (eInitStrategy == WeightInitializationStrategy::TEST)
1534  {
1535  // input and output properties
1536  int numInput = inputSize ();
1537 
1538  // compute variance and mean of input and output
1539  //...
1540 
1541 
1542  // compute the weights
1543  for (auto& layer: layers ())
1544  {
1545 // double nIn = numInput;
1546  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1547  {
1548  (*itWeight) = DNN::gaussDouble (0.0, 0.1);
1549  ++itWeight;
1550  }
1551  numInput = layer.numNodes ();
1552  }
1553  return;
1554  }
1555 
1556  if (eInitStrategy == WeightInitializationStrategy::LAYERSIZE)
1557  {
1558  // input and output properties
1559  int numInput = inputSize ();
1560 
1561  // compute variance and mean of input and output
1562  //...
1563 
1564 
1565  // compute the weights
1566  for (auto& layer: layers ())
1567  {
1568  double nIn = numInput;
1569  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1570  {
1571  (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn))); // factor 2.0 for ReLU
1572  ++itWeight;
1573  }
1574  numInput = layer.numNodes ();
1575  }
1576  return;
1577  }
1578 
1579  }
1580 
1581 
1582 
1583 
1584 
1585 /*! \brief compute the error function
1586  *
1587  *
1588  */
1589  template <typename Container, typename ItWeight>
1590  double Net::errorFunction (LayerData& layerData,
1591  Container truth,
1592  ItWeight itWeight,
1593  ItWeight itWeightEnd,
1594  double patternWeight,
1595  double factorWeightDecay,
1596  EnumRegularization eRegularization) const
1597  {
1598  double error (0);
1599  switch (m_eErrorFunction)
1600  {
1602  {
1603  error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth),
1604  layerData.deltasBegin (), layerData.deltasEnd (),
1605  layerData.inverseActivationFunction (),
1606  patternWeight);
1607  break;
1608  }
1610  {
1611  assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));
1612  std::vector<double> probabilities = layerData.probabilities ();
1613  error = crossEntropy (begin (probabilities), end (probabilities),
1614  begin (truth), end (truth),
1615  layerData.deltasBegin (), layerData.deltasEnd (),
1616  layerData.inverseActivationFunction (),
1617  patternWeight);
1618  break;
1619  }
1621  {
1622  std::cout << "softmax." << std::endl;
1623  assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));
1624  std::vector<double> probabilities = layerData.probabilities ();
1625  error = softMaxCrossEntropy (begin (probabilities), end (probabilities),
1626  begin (truth), end (truth),
1627  layerData.deltasBegin (), layerData.deltasEnd (),
1628  layerData.inverseActivationFunction (),
1629  patternWeight);
1630  break;
1631  }
1632  }
1633  if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE)
1634  {
1635  error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
1636  }
1637  return error;
1638  }
1639 
1640 
1641 
1642 
1643 
1644 
1645 
1646 // /*! \brief pre-training
1647 // *
1648 // * in development
1649 // */
1650 // template <typename Minimizer>
1651 // void Net::preTrain (std::vector<double>& weights,
1652 // std::vector<Pattern>& trainPattern,
1653 // const std::vector<Pattern>& testPattern,
1654 // Minimizer& minimizer, Settings& settings)
1655 // {
1656 // auto itWeightGeneral = std::begin (weights);
1657 // std::vector<Pattern> prePatternTrain (trainPattern.size ());
1658 // std::vector<Pattern> prePatternTest (testPattern.size ());
1659 
1660 // size_t _inputSize = inputSize ();
1661 
1662 // // transform pattern using the created preNet
1663 // auto initializePrePattern = [&](const std::vector<Pattern>& pttrnInput, std::vector<Pattern>& pttrnOutput)
1664 // {
1665 // pttrnOutput.clear ();
1666 // std::transform (std::begin (pttrnInput), std::end (pttrnInput),
1667 // std::back_inserter (pttrnOutput),
1668 // [](const Pattern& p)
1669 // {
1670 // Pattern pat (p.input (), p.input (), p.weight ());
1671 // return pat;
1672 // });
1673 // };
1674 
1675 // initializePrePattern (trainPattern, prePatternTrain);
1676 // initializePrePattern (testPattern, prePatternTest);
1677 
1678 // std::vector<double> originalDropFractions = settings.dropFractions ();
1679 
1680 // for (auto& _layer : layers ())
1681 // {
1682 // // compute number of weights (as a function of the number of incoming nodes)
1683 // // fetch number of nodes
1684 // size_t numNodes = _layer.numNodes ();
1685 // size_t _numWeights = _layer.numWeights (_inputSize);
1686 
1687 // // ------------------
1688 // DNN::Net preNet;
1689 // if (!originalDropFractions.empty ())
1690 // {
1691 // originalDropFractions.erase (originalDropFractions.begin ());
1692 // settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ());
1693 // }
1694 // std::vector<double> preWeights;
1695 
1696 // // define the preNet (pretraining-net) for this layer
1697 // // outputSize == inputSize, because this is an autoencoder;
1698 // preNet.setInputSize (_inputSize);
1699 // preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ()));
1700 // preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT));
1701 // preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES);
1702 // preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder)
1703 
1704 // // initialize weights
1705 // preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM,
1706 // std::back_inserter (preWeights));
1707 
1708 // // overwrite already existing weights from the "general" weights
1709 // std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ());
1710 // std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer
1711 
1712 
1713 // // train the "preNet"
1714 // preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings);
1715 
1716 // // fetch the pre-trained weights (without the output part of the autoencoder)
1717 // std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral);
1718 
1719 // // advance the iterator on the incoming weights
1720 // itWeightGeneral += _numWeights;
1721 
1722 // // remove the weights of the output layer of the preNet
1723 // preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ());
1724 
1725 // // remove the outputLayer of the preNet
1726 // preNet.removeLayer ();
1727 
1728 // // set the output size to the number of nodes in the new output layer (== last hidden layer)
1729 // preNet.setOutputSize (numNodes);
1730 
1731 // // transform pattern using the created preNet
1732 // auto proceedPattern = [&](std::vector<Pattern>& pttrn)
1733 // {
1734 // std::vector<Pattern> newPttrn;
1735 // std::for_each (std::begin (pttrn), std::end (pttrn),
1736 // [&preNet,&preWeights,&newPttrn](Pattern& p)
1737 // {
1738 // std::vector<double> output = preNet.compute (p.input (), preWeights);
1739 // Pattern pat (output, output, p.weight ());
1740 // newPttrn.push_back (pat);
1741 // // p = pat;
1742 // });
1743 // return newPttrn;
1744 // };
1745 
1746 
1747 // prePatternTrain = proceedPattern (prePatternTrain);
1748 // prePatternTest = proceedPattern (prePatternTest);
1749 
1750 
1751 // // the new input size is the output size of the already reduced preNet
1752 // _inputSize = preNet.layers ().back ().numNodes ();
1753 // }
1754 // }
1755 
1756 
1757 
1758 
1759 
1760 
1761 
1762 
1763 
1764 
1765 
1766 
1767 
1768 
1769 
1770 
1771  } // namespace DNN
1772 } // namespace TMVA
1773 
1774 #endif
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:828
static std::shared_ptr< std::function< double(double)> > InvGauss
Definition: NeuralNet.icc:77
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:612
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
Definition: NeuralNet.icc:1328
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:834
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:806
double tanh(double)
static std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.icc:55
std::vector< char > DropContainer
Definition: NeuralNet.h:220
static std::shared_ptr< std::function< double(double)> > InvReLU
Definition: NeuralNet.icc:65
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:595
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:213
static std::shared_ptr< std::function< double(double)> > InvTanh
Definition: NeuralNet.icc:56
double T(double x)
Definition: ChebyshevPol.h:34
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
Definition: NeuralNet.icc:1243
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:773
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
Definition: NeuralNet.icc:1223
std::vector< double > & output()
Definition: Pattern.h:84
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:769
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:570
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:820
Basic string class.
Definition: TString.h:129
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:822
Definition: Pattern.h:7
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
Definition: NeuralNet.icc:939
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:830
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:181
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:813
static std::shared_ptr< std::function< double(double)> > InvSoftSign
Definition: NeuralNet.icc:74
Float_t delta
static std::shared_ptr< std::function< double(double)> > TanhShift
Definition: NeuralNet.icc:70
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
static std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.icc:52
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:811
double sqrt(double)
Double_t x[n]
Definition: legend1.C:17
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:592
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:611
double pow(double, double)
static std::shared_ptr< std::function< double(double)> > SymmReLU
Definition: NeuralNet.icc:61
const_iterator end() const
Definition: NeuralNet.h:246
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:374
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:826
T uniformFromTo(T from, T to)
Definition: NeuralNet.icc:31
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
Definition: NeuralNet.icc:650
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:775
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
Definition: NeuralNet.icc:1298
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1481
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
size_t dropRepetitions() const
Definition: NeuralNet.h:768
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:593
Double_t(* Function)(Double_t)
Definition: Functor.C:4
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:776
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:795
RooCmdArg Minimizer(const char *type, const char *alg=0)
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
Definition: NeuralNet.icc:1070
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:456
char * Form(const char *fmt,...)
static std::shared_ptr< std::function< double(double)> > InvSoftPlus
Definition: NeuralNet.icc:68
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:213
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
Definition: NeuralNet.icc:207
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
Definition: NeuralNet.icc:1590
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:825
std::vector< double >::const_iterator const_iterator
Definition: Pattern.h:12
static std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.icc:73
Settings for the training of the neural net.
Definition: NeuralNet.h:736
virtual void startTrainCycle()
Definition: NeuralNet.h:789
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1056
constexpr Double_t E()
Definition: TMath.h:74
static std::shared_ptr< std::function< double(double)> > ReLU
Definition: NeuralNet.icc:64
static std::shared_ptr< std::function< double(double)> > InvSigmoid
Definition: NeuralNet.icc:53
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:377
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
static std::shared_ptr< std::function< double(double)> > GaussComplement
Definition: NeuralNet.icc:79
double weight() const
Definition: Pattern.h:74
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:587
double f(double x)
ModeOutputValues
Definition: NeuralNet.h:179
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
Definition: NeuralNet.icc:1111
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:14
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:235
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
Definition: NeuralNet.icc:1037
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:373
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:710
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:835
static RooMathCoreReg dummy
static std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.icc:76
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:544
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:22
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
Definition: NeuralNet.icc:269
static std::shared_ptr< std::function< double(double)> > Linear
Definition: NeuralNet.icc:58
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:596
Abstract ClassifierFactory template that handles arbitrary types.
#define MATH_UNUSED(var)
Definition: Util.h:24
static std::shared_ptr< std::function< double(double)> > InvGaussComplement
Definition: NeuralNet.icc:80
static std::shared_ptr< std::function< double(double)> > InvLinear
Definition: NeuralNet.icc:59
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
Definition: NeuralNet.icc:1416
size_t size() const
Definition: NeuralNet.h:248
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:774
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:812
static std::shared_ptr< std::function< double(double)> > InvTanhShift
Definition: NeuralNet.icc:71
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:376
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:601
void backPropagate(std::vector< std::vector< LayerData >> &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Definition: NeuralNet.icc:1365
double result[121]
Definition: Rtypes.h:56
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:816
const_iterator begin() const
Definition: NeuralNet.h:245
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:449
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:372
double exp(double)
EnumRegularization
Definition: NeuralNet.h:173
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:586
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
static std::shared_ptr< std::function< double(double)> > SoftPlus
Definition: NeuralNet.icc:67
const Int_t n
Definition: legend1.C:16
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:410
static std::shared_ptr< std::function< double(double)> > ZeroFnc
Definition: NeuralNet.icc:49
virtual bool hasConverged(double testError)
has this training converged already?
Definition: NeuralNet.cxx:467
LayerData holds the data of one layer.
Definition: NeuralNet.h:437
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:788
double log(double)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1301
static std::shared_ptr< std::function< double(double)> > InvSymmReLU
Definition: NeuralNet.icc:62