Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
NeuralNet.icc
Go to the documentation of this file.
1#ifndef TMVA_NEURAL_NET_I
2#define TMVA_NEURAL_NET_I
3
4#ifndef TMVA_NEURAL_NET
5#error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."
6#endif // TMVA_NEURAL_NET
7#pragma once
8#ifndef _MSC_VER
9#pragma GCC diagnostic ignored "-Wunused-variable"
10#endif
11
12#include "Math/Util.h"
13
14#include "TMVA/Pattern.h"
15#include "TMVA/MethodBase.h"
16
17#include <tuple>
18#include <future>
19#include <random>
20
21namespace TMVA
22{
23 namespace DNN
24 {
25
26
27
28
29
30
31
32
33 template <typename T>
34 T uniformFromTo (T from, T to)
35 {
36 return from + (rand ()* (to - from)/RAND_MAX);
37 }
38
39
40
41 template <typename Container, typename T>
42 void uniformDouble (Container& container, T maxValue)
43 {
44 for (auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
45 {
46// (*it) = uniformFromTo (-1.0*maxValue, 1.0*maxValue);
47 (*it) = TMVA::DNN::uniformFromTo (-1.0*maxValue, 1.0*maxValue);
48 }
49 }
50
51
52 extern std::shared_ptr<std::function<double(double)>> ZeroFnc;
53
54
55 extern std::shared_ptr<std::function<double(double)>> Sigmoid;
56 extern std::shared_ptr<std::function<double(double)>> InvSigmoid;
57
58 extern std::shared_ptr<std::function<double(double)>> Tanh;
59 extern std::shared_ptr<std::function<double(double)>> InvTanh;
60
61 extern std::shared_ptr<std::function<double(double)>> Linear;
62 extern std::shared_ptr<std::function<double(double)>> InvLinear;
63
64 extern std::shared_ptr<std::function<double(double)>> SymmReLU;
65 extern std::shared_ptr<std::function<double(double)>> InvSymmReLU;
66
67 extern std::shared_ptr<std::function<double(double)>> ReLU;
68 extern std::shared_ptr<std::function<double(double)>> InvReLU;
69
70 extern std::shared_ptr<std::function<double(double)>> SoftPlus;
71 extern std::shared_ptr<std::function<double(double)>> InvSoftPlus;
72
73 extern std::shared_ptr<std::function<double(double)>> TanhShift;
74 extern std::shared_ptr<std::function<double(double)>> InvTanhShift;
75
76 extern std::shared_ptr<std::function<double(double)>> SoftSign;
77 extern std::shared_ptr<std::function<double(double)>> InvSoftSign;
78
79 extern std::shared_ptr<std::function<double(double)>> Gauss;
80 extern std::shared_ptr<std::function<double(double)>> InvGauss;
81
82 extern std::shared_ptr<std::function<double(double)>> GaussComplement;
83 extern std::shared_ptr<std::function<double(double)>> InvGaussComplement;
84
85
86/*! \brief apply weights using drop-out; for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
87 *
88 * itDrop correlates with itSourceBegin
89 */
90template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItTarget, typename ItDrop>
91 void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd,
92 ItWeight itWeight,
93 ItTarget itTargetBegin, ItTarget itTargetEnd,
94 ItDrop itDrop)
95 {
96 for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
97 {
98 for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
99 {
100 if (!HasDropOut || *itDrop)
101 (*itTarget) += (*itSource) * (*itWeight);
102 ++itWeight;
103 }
104 if (HasDropOut) ++itDrop;
105 }
106 }
107
108
109
110
111
112
113/*! \brief apply weights backwards (for backprop); for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
114 *
115 * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards)
116 */
117template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev, typename ItDrop>
118 void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd,
119 ItWeight itWeight,
120 ItPrev itPrevBegin, ItPrev itPrevEnd,
121 ItDrop itDrop)
122 {
123 for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
124 {
125 for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
126 {
127 if (!HasDropOut || *itDrop)
128 (*itPrev) += (*itCurr) * (*itWeight);
129 ++itWeight;
130 }
131 if (HasDropOut) ++itDrop;
132 }
133 }
134
135
136
137
138
139
140
141/*! \brief apply the activation functions
142 *
143 *
144 */
145
146 template <typename ItValue, typename Fnc>
147 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc)
148 {
149 while (itValue != itValueEnd)
150 {
151 auto& value = (*itValue);
152 value = (*fnc.get ()) (value);
153
154 ++itValue;
155 }
156 }
157
158
159/*! \brief apply the activation functions and compute the gradient
160 *
161 *
162 */
163 template <typename ItValue, typename Fnc, typename InvFnc, typename ItGradient>
164 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
165 {
166 while (itValue != itValueEnd)
167 {
168 auto& value = (*itValue);
169 value = (*fnc.get ()) (value);
170 (*itGradient) = (*invFnc.get ()) (value);
171
172 ++itValue; ++itGradient;
173 }
174 }
175
176
177
178/*! \brief update the gradients
179 *
180 *
181 */
182 template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
183 void update (ItSource itSource, ItSource itSourceEnd,
184 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
185 ItTargetGradient itTargetGradientBegin,
186 ItGradient itGradient)
187 {
188 while (itSource != itSourceEnd)
189 {
190 auto itTargetDelta = itTargetDeltaBegin;
191 auto itTargetGradient = itTargetGradientBegin;
192 while (itTargetDelta != itTargetDeltaEnd)
193 {
194 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
195 ++itTargetDelta; ++itTargetGradient; ++itGradient;
196 }
197 ++itSource;
198 }
199 }
200
201
202
203
204/*! \brief compute the regularization (L1, L2)
205 *
206 *
207 */
208 template <EnumRegularization Regularization>
209 inline double computeRegularization (double weight, const double& factorWeightDecay)
210 {
211 MATH_UNUSED(weight);
212 MATH_UNUSED(factorWeightDecay);
213
214 return 0;
215 }
216
217// L1 regularization
218 template <>
219 inline double computeRegularization<EnumRegularization::L1> (double weight, const double& factorWeightDecay)
220 {
221 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
222 }
223
224// L2 regularization
225 template <>
226 inline double computeRegularization<EnumRegularization::L2> (double weight, const double& factorWeightDecay)
227 {
228 return factorWeightDecay * weight;
229 }
230
231
232/*! \brief update the gradients, using regularization
233 *
234 *
235 */
236 template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
237 void update (ItSource itSource, ItSource itSourceEnd,
238 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
239 ItTargetGradient itTargetGradientBegin,
240 ItGradient itGradient,
241 ItWeight itWeight, double weightDecay)
242 {
243 // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights
244 while (itSource != itSourceEnd)
245 {
246 auto itTargetDelta = itTargetDeltaBegin;
247 auto itTargetGradient = itTargetGradientBegin;
248 while (itTargetDelta != itTargetDeltaEnd)
249 {
250 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,weightDecay);
251 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
252 }
253 ++itSource;
254 }
255 }
256
257
258
259
260
261
262#define USELOCALWEIGHTS 1
263
264
265
266/*! \brief implementation of the steepest gradient descent algorithm
267 *
268 * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle
269 */
270 template <typename Function, typename Weights, typename PassThrough>
271 double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough)
272 {
273 size_t numWeights = weights.size ();
274 // std::vector<double> gradients (numWeights, 0.0);
275 m_localGradients.assign (numWeights, 0.0);
276 // std::vector<double> localWeights (begin (weights), end (weights));
277 // m_localWeights.reserve (numWeights);
278 m_localWeights.assign (begin (weights), end (weights));
279
280 double E = 1e10;
281 if (m_prevGradients.size () != numWeights)
282 {
283 m_prevGradients.clear ();
284 m_prevGradients.assign (weights.size (), 0);
285 }
286
287 bool success = true;
288 size_t currentRepetition = 0;
289 while (success)
290 {
291 if (currentRepetition >= m_repetitions)
292 break;
293
294 m_localGradients.assign (numWeights, 0.0);
295
296 // --- nesterov momentum ---
297 // apply momentum before computing the new gradient
298 auto itPrevG = begin (m_prevGradients);
299 auto itPrevGEnd = end (m_prevGradients);
300 auto itLocWeight = begin (m_localWeights);
301 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
302 {
303 (*itPrevG) *= m_beta;
304 (*itLocWeight) += (*itPrevG);
305 }
306
307 E = fitnessFunction (passThrough, m_localWeights, m_localGradients);
308// plotGradients (gradients);
309// plotWeights (localWeights);
310
311 double alpha = gaussDouble (m_alpha, m_alpha/2.0);
312// double alpha = m_alpha;
313
314 auto itG = begin (m_localGradients);
315 auto itGEnd = end (m_localGradients);
316 itPrevG = begin (m_prevGradients);
317 double maxGrad = 0.0;
318 for (; itG != itGEnd; ++itG, ++itPrevG)
319 {
320 double currGrad = (*itG);
321 double prevGrad = (*itPrevG);
322 currGrad *= alpha;
323
324 //(*itPrevG) = m_beta * (prevGrad + currGrad);
325 currGrad += prevGrad;
326 (*itG) = currGrad;
327 (*itPrevG) = currGrad;
328
329 if (std::fabs (currGrad) > maxGrad)
330 maxGrad = currGrad;
331 }
332
333 if (maxGrad > 1)
334 {
335 m_alpha /= 2;
336 std::cout << "\nlearning rate reduced to " << m_alpha << std::endl;
337 std::for_each (weights.begin (), weights.end (), [maxGrad](double& w)
338 {
339 w /= maxGrad;
340 });
341 m_prevGradients.clear ();
342 }
343 else
344 {
345 auto itW = std::begin (weights);
346 std::for_each (std::begin (m_localGradients), std::end (m_localGradients), [&itW](double& g)
347 {
348 *itW += g;
349 ++itW;
350 });
351 }
352
353 ++currentRepetition;
354 }
355 return E;
356 }
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377/*! \brief sum of squares error function
378 *
379 *
380 */
381 template <typename ItOutput, typename ItTruth, typename ItDelta, typename InvFnc>
382 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight)
383 {
384 double errorSum = 0.0;
385
386 // output - truth
387 ItTruth itTruth = itTruthBegin;
388 bool hasDeltas = (itDelta != itDeltaEnd);
389 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
390 {
391// assert (itTruth != itTruthEnd);
392 double output = (*itOutput);
393 double error = output - (*itTruth);
394 if (hasDeltas)
395 {
396 (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
397 ++itDelta;
398 }
399 errorSum += error*error * patternWeight;
400 }
401
402 return 0.5*errorSum;
403 }
404
405
406
407/*! \brief cross entropy error function
408 *
409 *
410 */
411 template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
412 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
413 {
414 bool hasDeltas = (itDelta != itDeltaEnd);
415
416 double errorSum = 0.0;
417 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
418 {
419 double probability = *itProbability;
420 double truth = *itTruthBegin;
421 /* truth = truth < 0.1 ? 0.1 : truth; */
422 /* truth = truth > 0.9 ? 0.9 : truth; */
423 truth = truth < 0.5 ? 0.1 : 0.9;
424 if (hasDeltas)
425 {
426 double delta = probability - truth;
427 (*itDelta) = delta*patternWeight;
428// (*itDelta) = (*itInvActFnc)(probability) * delta * patternWeight;
429 ++itDelta;
430 }
431 double error (0);
432 if (probability == 0) // protection against log (0)
433 {
434 if (truth >= 0.5)
435 error += 1.0;
436 }
437 else if (probability == 1)
438 {
439 if (truth < 0.5)
440 error += 1.0;
441 }
442 else
443 error += - (truth * log (probability) + (1.0-truth) * log (1.0-probability)); // cross entropy function
444 errorSum += error * patternWeight;
445
446 }
447 return errorSum;
448 }
449
450
451
452
453/*! \brief soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
454 *
455 *
456 */
457 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
458 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
459 {
460 double errorSum = 0.0;
461
462 bool hasDeltas = (itDelta != itDeltaEnd);
463 // output - truth
464 ItTruth itTruth = itTruthBegin;
465 for (auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
466 {
467// assert (itTruth != itTruthEnd);
468 double probability = (*itProbability);
469 double truth = (*itTruth);
470 if (hasDeltas)
471 {
472 (*itDelta) = probability - truth;
473// (*itDelta) = (*itInvActFnc)(sm) * delta * patternWeight;
474 ++itDelta; //++itInvActFnc;
475 }
476 double error (0);
477
478 error += truth * log (probability);
479 errorSum += error;
480 }
481
482 return -errorSum * patternWeight;
483 }
484
485
486
487
488
489
490
491
492
493/*! \brief compute the weight decay for regularization (L1 or L2)
494 *
495 *
496 */
497 template <typename ItWeight>
498 double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
499 {
500 if (eRegularization == EnumRegularization::L1)
501 {
502 // weight decay (regularization)
503 double w = 0;
504 size_t n = 0;
505 for (; itWeight != itWeightEnd; ++itWeight, ++n)
506 {
507 double weight = (*itWeight);
508 w += std::fabs (weight);
509 }
510 return error + 0.5 * w * factorWeightDecay / n;
511 }
512 else if (eRegularization == EnumRegularization::L2)
513 {
514 // weight decay (regularization)
515 double w = 0;
516 size_t n = 0;
517 for (; itWeight != itWeightEnd; ++itWeight, ++n)
518 {
519 double weight = (*itWeight);
520 w += weight*weight;
521 }
522 return error + 0.5 * w * factorWeightDecay / n;
523 }
524 else
525 return error;
526 }
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541/*! \brief apply the weights (and functions) in forward direction of the DNN
542 *
543 *
544 */
545 template <typename LAYERDATA>
546 void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
547 {
548 if (prevLayerData.hasDropOut ())
549 {
550 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
551 currLayerData.weightsBegin (),
552 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
553 prevLayerData.dropOut ());
554 }
555 else
556 {
557 bool dummy = true;
558 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
559 currLayerData.weightsBegin (),
560 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
561 &dummy); // dummy to turn on all nodes (no drop out)
562 }
563 }
564
565
566
567/*! \brief backward application of the weights (back-propagation of the error)
568 *
569 *
570 */
571template <typename LAYERDATA>
572 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
573{
574 if (prevLayerData.hasDropOut ())
575 {
576 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
577 currLayerData.weightsBegin (),
578 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
579 prevLayerData.dropOut ());
580 }
581 else
582 {
583 bool dummy = true;
584 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
585 currLayerData.weightsBegin (),
586 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
587 &dummy); // dummy to use all nodes (no drop out)
588 }
589}
590
591
592
593
594
595/*! \brief update the node values
596 *
597 *
598 */
599 template <typename LAYERDATA>
600 void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization)
601 {
602 // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights
603 if (factorWeightDecay != 0.0) // has weight regularization
604 if (regularization == EnumRegularization::L1) // L1 regularization ( sum(|w|) )
605 {
606 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
607 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
608 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
609 currLayerData.weightsBegin (), factorWeightDecay);
610 }
611 else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) )
612 {
613 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
614 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
615 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
616 currLayerData.weightsBegin (), factorWeightDecay);
617 }
618 else
619 {
620 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
621 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
622 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
623 }
624
625 else
626 { // no weight regularization
627 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
628 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
629 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
630 }
631 }
632
633
634
635
636
637
638
639
640
641
642
643
644/*! \brief compute the drop-out-weight factor
645 *
646 * when using drop-out a fraction of the nodes is turned off at each cycle of the computation
647 * once all nodes are turned on again (for instances when the test samples are evaluated),
648 * the weights have to be adjusted to account for the different number of active nodes
649 * this function computes the factor and applies it to the weights
650 */
651 template <typename WeightsType, typename DropProbabilities>
652 void Net::dropOutWeightFactor (WeightsType& weights,
653 const DropProbabilities& drops,
654 bool inverse)
655 {
656 if (drops.empty () || weights.empty ())
657 return;
658
659 auto itWeight = std::begin (weights);
660 auto itWeightEnd = std::end (weights);
661 auto itDrop = std::begin (drops);
662 auto itDropEnd = std::end (drops);
663 size_t numNodesPrev = inputSize ();
664 double dropFractionPrev = *itDrop;
665 ++itDrop;
666
667 for (auto& layer : layers ())
668 {
669 if (itDrop == itDropEnd)
670 break;
671
672 size_t _numNodes = layer.numNodes ();
673
674 double dropFraction = *itDrop;
675 double pPrev = 1.0 - dropFractionPrev;
676 double p = 1.0 - dropFraction;
677 p *= pPrev;
678
679 if (inverse)
680 {
681 p = 1.0/p;
682 }
683 size_t _numWeights = layer.numWeights (numNodesPrev);
684 for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
685 {
686 if (itWeight == itWeightEnd)
687 break;
688
689 *itWeight *= p;
690 ++itWeight;
691 }
692 numNodesPrev = _numNodes;
693 dropFractionPrev = dropFraction;
694 ++itDrop;
695 }
696 }
697
698
699
700
701
702
703/*! \brief execute the training until convergence emerges
704 *
705 * \param weights the container with the weights (synapses)
706 * \param trainPattern the pattern for the training
707 * \param testPattern the pattern for the testing
708 * \param minimizer the minimizer (e.g. steepest gradient descent) to be used
709 * \param settings the settings for the training (e.g. multithreading or not, regularization etc.)
710 */
711 template <typename Minimizer>
712 double Net::train (std::vector<double>& weights,
713 std::vector<Pattern>& trainPattern,
714 const std::vector<Pattern>& testPattern,
715 Minimizer& minimizer,
716 Settings& settings)
717 {
718// std::cout << "START TRAINING" << std::endl;
719 settings.startTrainCycle ();
720
721 // JsMVA progress bar maximum (100%)
722 if (fIPyMaxIter) *fIPyMaxIter = 100;
723
724 settings.pads (4);
725 settings.create ("trainErrors", 100, 0, 100, 100, 0,1);
726 settings.create ("testErrors", 100, 0, 100, 100, 0,1);
727
728 size_t cycleCount = 0;
729 size_t testCycleCount = 0;
730 double testError = 1e20;
731 double trainError = 1e20;
732 size_t dropOutChangeCount = 0;
733
734 DropContainer dropContainer;
735 DropContainer dropContainerTest;
736 const std::vector<double>& dropFractions = settings.dropFractions ();
737 bool isWeightsForDrop = false;
738
739
740 // until convergence
741 do
742 {
743 ++cycleCount;
744
745 // if dropOut enabled
746 size_t dropIndex = 0;
747 if (!dropFractions.empty () && dropOutChangeCount % settings.dropRepetitions () == 0)
748 {
749 // fill the dropOut-container
750 dropContainer.clear ();
751 size_t _numNodes = inputSize ();
752 double dropFraction = 0.0;
753 dropFraction = dropFractions.at (dropIndex);
754 ++dropIndex;
755 fillDropContainer (dropContainer, dropFraction, _numNodes);
756 for (auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
757 {
758 auto& layer = *itLayer;
759 _numNodes = layer.numNodes ();
760 // how many nodes have to be dropped
761 dropFraction = 0.0;
762 if (dropFractions.size () > dropIndex)
763 dropFraction = dropFractions.at (dropIndex);
764
765 fillDropContainer (dropContainer, dropFraction, _numNodes);
766 }
767 isWeightsForDrop = true;
768 }
769
770 // execute training cycle
771 trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
772
773
774 // ------ check if we have to execute a test ------------------
775 bool hasConverged = false;
776 if (testCycleCount % settings.testRepetitions () == 0) // we test only everye "testRepetitions" repetition
777 {
778 if (isWeightsForDrop)
779 {
780 dropOutWeightFactor (weights, dropFractions);
781 isWeightsForDrop = false;
782 }
783
784
785 testError = 0;
786 //double weightSum = 0;
787 settings.startTestCycle ();
788 if (settings.useMultithreading ())
789 {
790 size_t numThreads = std::thread::hardware_concurrency ();
791 size_t patternPerThread = testPattern.size () / numThreads;
792 std::vector<Batch> batches;
793 auto itPat = testPattern.begin ();
794 // auto itPatEnd = testPattern.end ();
795 for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
796 {
797 batches.push_back (Batch (itPat, itPat + patternPerThread));
798 itPat += patternPerThread;
799 }
800 if (itPat != testPattern.end ())
801 batches.push_back (Batch (itPat, testPattern.end ()));
802
803 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
804 for (auto& batch : batches)
805 {
806 // -------------------- execute each of the batch ranges on a different thread -------------------------------
807 futures.push_back (
808 std::async (std::launch::async, [&]()
809 {
810 std::vector<double> localOutput;
811 pass_through_type passThrough (settings, batch, dropContainerTest);
812 double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput);
813 return std::make_tuple (testBatchError, localOutput);
814 })
815 );
816 }
817
818 auto itBatch = batches.begin ();
819 for (auto& f : futures)
820 {
821 std::tuple<double,std::vector<double>> result = f.get ();
822 testError += std::get<0>(result) / batches.size ();
823 std::vector<double> output = std::get<1>(result);
824 if (output.size() == (outputSize() - 1) * itBatch->size())
825 {
826 auto output_iterator = output.begin();
827 for (auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
828 {
829 for (size_t output_index = 1; output_index < outputSize(); ++output_index)
830 {
831 settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
832 (*pattern_it).weight ());
833 ++output_iterator;
834 }
835 }
836 }
837 ++itBatch;
838 }
839
840 }
841 else
842 {
843 std::vector<double> output;
844 //for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it)
845 {
846 //const Pattern& p = (*it);
847 //double weight = p.weight ();
848 //Batch batch (it, it+1);
849 Batch batch (begin (testPattern), end (testPattern));
850 output.clear ();
851 pass_through_type passThrough (settings, batch, dropContainerTest);
852 double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output);
853 if (output.size() == (outputSize() - 1) * batch.size())
854 {
855 auto output_iterator = output.begin();
856 for (auto pattern_it = batch.begin(); pattern_it != batch.end(); ++pattern_it)
857 {
858 for (size_t output_index = 1; output_index < outputSize(); ++output_index)
859 {
860 settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
861 (*pattern_it).weight ());
862 ++output_iterator;
863 }
864 }
865 }
866 testError += testPatternError; /// batch.size ();
867 }
868 // testError /= testPattern.size ();
869 }
870 settings.endTestCycle ();
871// testError /= weightSum;
872
873 settings.computeResult (*this, weights);
874
875 hasConverged = settings.hasConverged (testError);
876 if (!hasConverged && !isWeightsForDrop)
877 {
878 dropOutWeightFactor (weights, dropFractions, true); // inverse
879 isWeightsForDrop = true;
880 }
881 }
882 ++testCycleCount;
883 ++dropOutChangeCount;
884
885
886 static double x = -1.0;
887 x += 1.0;
888// settings.resetPlot ("errors");
889 settings.addPoint ("trainErrors", cycleCount, trainError);
890 settings.addPoint ("testErrors", cycleCount, testError);
891 settings.plot ("trainErrors", "C", 1, kBlue);
892 settings.plot ("testErrors", "C", 1, kMagenta);
893
894
895 // setup error plots and progress bar variables for JsMVA
896 if (fInteractive){
897 fInteractive->AddPoint(cycleCount, trainError, testError);
898 if (*fExitFromTraining) break;
899 *fIPyCurrentIter = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
900 }
901
902 if (hasConverged)
903 break;
904
905 if ((int)cycleCount % 10 == 0) {
906
907 TString convText = Form( "(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
908 trainError,
909 testError,
910 (int)cycleCount,
911 (int)settings.convergenceCount (),
912 (int)settings.maxConvergenceCount ());
913 double progress = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
914 settings.cycle (progress, convText);
915 }
916 }
917 while (true);
918 settings.endTrainCycle (trainError);
919
920 TString convText = Form( "(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (int)cycleCount);
921 double progress = 100*(double)settings.maxConvergenceCount() /(double)settings.convergenceSteps ();
922 settings.cycle (progress, convText);
923
924 return testError;
925 }
926
927
928
929/*! \brief execute a single training cycle
930 *
931 * uses multithreading if turned on
932 *
933 * \param minimizer the minimizer to be used (e.g. SGD)
934 * \param weights the weight container with all the synapse weights
935 * \param itPatternBegin begin of the pattern container
936 * \param itPatternEnd the end of the pattern container
937 * \param settings the settings for this training (e.g. multithreading or not, regularization, etc.)
938 * \param dropContainer the data for dropping-out nodes (regularization technique)
939 */
940 template <typename Iterator, typename Minimizer>
941 inline double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weights,
942 Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer)
943 {
944 double error = 0.0;
945 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
946 size_t numBatches = numPattern/settings.batchSize ();
947 size_t numBatches_stored = numBatches;
948
949 std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});
950 Iterator itPatternBatchBegin = itPatternBegin;
951 Iterator itPatternBatchEnd = itPatternBatchBegin;
952
953 // create batches
954 std::vector<Batch> batches;
955 while (numBatches > 0)
956 {
957 std::advance (itPatternBatchEnd, settings.batchSize ());
958 batches.push_back (Batch (itPatternBatchBegin, itPatternBatchEnd));
959 itPatternBatchBegin = itPatternBatchEnd;
960 --numBatches;
961 }
962
963 // add the last pattern to the last batch
964 if (itPatternBatchEnd != itPatternEnd)
965 batches.push_back (Batch (itPatternBatchEnd, itPatternEnd));
966
967
968 ///< turn on multithreading if requested
969 if (settings.useMultithreading ())
970 {
971 // -------------------- divide the batches into bunches for each thread --------------
972 size_t numThreads = std::thread::hardware_concurrency ();
973 size_t batchesPerThread = batches.size () / numThreads;
974 typedef std::vector<Batch>::iterator batch_iterator;
975 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
976 batch_iterator itBatchBegin = std::begin (batches);
977 batch_iterator itBatchCurrEnd = std::begin (batches);
978 batch_iterator itBatchEnd = std::end (batches);
979 for (size_t iT = 0; iT < numThreads; ++iT)
980 {
981 if (iT == numThreads-1)
982 itBatchCurrEnd = itBatchEnd;
983 else
984 std::advance (itBatchCurrEnd, batchesPerThread);
985 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
986 itBatchBegin = itBatchCurrEnd;
987 }
988
989 // -------------------- loop over batches -------------------------------------------
990 std::vector<std::future<double>> futures;
991 for (auto& batchRange : batchVec)
992 {
993 // -------------------- execute each of the batch ranges on a different thread -------------------------------
994 futures.push_back (
995 std::async (std::launch::async, [&]()
996 {
997 double localError = 0.0;
998 for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
999 {
1000 Batch& batch = *it;
1001 pass_through_type settingsAndBatch (settings, batch, dropContainer);
1002 Minimizer minimizerClone (minimizer);
1003 localError += minimizerClone ((*this), weights, settingsAndBatch); /// call the minimizer
1004 }
1005 return localError;
1006 })
1007 );
1008 }
1009
1010 for (auto& f : futures)
1011 error += f.get ();
1012 }
1013 else
1014 {
1015 for (auto& batch : batches)
1016 {
1017 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1018 error += minimizer ((*this), weights, settingsAndBatch);
1019 }
1020 }
1021
1022 numBatches_stored = std::max (numBatches_stored, size_t(1)); /// normalize the error
1023 error /= numBatches_stored;
1024 settings.testIteration ();
1025
1026 return error;
1027 }
1028
1029
1030
1031
1032
1033/*! \brief compute the neural net
1034 *
1035 * \param input the input data
1036 * \param weights the weight data
1037 */
1038 template <typename Weights>
1039 std::vector<double> Net::compute (const std::vector<double>& input, const Weights& weights) const
1040 {
1041 std::vector<LayerData> layerData;
1042 layerData.reserve (m_layers.size ()+1);
1043 auto itWeight = begin (weights);
1044 auto itInputBegin = begin (input);
1045 auto itInputEnd = end (input);
1046 layerData.push_back (LayerData (itInputBegin, itInputEnd));
1047 size_t numNodesPrev = input.size ();
1048
1049 // -------------------- prepare layer data with one pattern -------------------------------
1050 for (auto& layer: m_layers)
1051 {
1052 layerData.push_back (LayerData (layer.numNodes (), itWeight,
1053 layer.activationFunction (),
1054 layer.modeOutputValues ()));
1055 size_t _numWeights = layer.numWeights (numNodesPrev);
1056 itWeight += _numWeights;
1057 numNodesPrev = layer.numNodes ();
1058 }
1059
1060
1061 // --------- forward -------------
1062 forwardPattern (m_layers, layerData);
1063
1064 // ------------- fetch output ------------------
1065 std::vector<double> output;
1066 fetchOutput (layerData.back (), output);
1067 return output;
1068 }
1069
1070
1071 template <typename Weights, typename PassThrough>
1072 double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights) const
1073 {
1074 std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1075 assert (numWeights () == weights.size ());
1076 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing, false);
1077 return error;
1078 }
1079
1080 template <typename Weights, typename PassThrough, typename OutContainer>
1081 double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput /*eFetch*/, OutContainer& outputContainer) const
1082 {
1083 std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1084 assert (numWeights () == weights.size ());
1085 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer, true);
1086 return error;
1087 }
1088
1089
1090 template <typename Weights, typename Gradients, typename PassThrough>
1091 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const
1092 {
1093 std::vector<double> nothing;
1094 assert (numWeights () == weights.size ());
1095 assert (weights.size () == gradients.size ());
1096 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing, false);
1097 return error;
1098 }
1099
1100 template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1101 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const
1102 {
1103 MATH_UNUSED(eFetch);
1104 assert (numWeights () == weights.size ());
1105 assert (weights.size () == gradients.size ());
1106 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer, true);
1107 return error;
1108 }
1109
1110
1111
1112 template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1113 std::vector<std::vector<LayerData>> Net::prepareLayerData (LayerContainer& _layers,
1114 Batch& batch,
1115 const DropContainer& dropContainer,
1116 ItWeight itWeightBegin,
1117 ItWeight /*itWeightEnd*/,
1118 ItGradient itGradientBegin,
1119 ItGradient itGradientEnd,
1120 size_t& totalNumWeights) const
1121 {
1123 bool usesDropOut = !dropContainer.empty ();
1124 if (usesDropOut)
1125 itDropOut = std::begin (dropContainer);
1126
1127 if (_layers.empty ())
1128 throw std::string ("no layers in this net");
1129
1130
1131 // ----------- create layer data -------------------------------------------------------
1132 //LM- This assert not needed anymore (outputsize is actually numNodes+1)
1133 //assert (_layers.back ().numNodes () == outputSize ());
1134 totalNumWeights = 0;
1135 size_t totalNumNodes = 0;
1136 std::vector<std::vector<LayerData>> layerPatternData;
1137 layerPatternData.reserve (_layers.size ()+1);
1138 ItWeight itWeight = itWeightBegin;
1139 ItGradient itGradient = itGradientBegin;
1140 size_t numNodesPrev = inputSize ();
1141 typename Pattern::const_iterator itInputBegin;
1142 typename Pattern::const_iterator itInputEnd;
1143
1144 // ItWeight itGammaBegin = itWeightBegin + numWeights ();
1145 // ItWeight itBetaBegin = itWeightBegin + numWeights () + numNodes ();
1146 // ItGradient itGradGammaBegin = itGradientBegin + numWeights ();
1147 // ItGradient itGradBetaBegin = itGradientBegin + numWeights () + numNodes ();
1148
1149
1150 // --------------------- prepare layer data for input layer ----------------------------
1151 layerPatternData.push_back (std::vector<LayerData>());
1152 for (const Pattern& _pattern : batch)
1153 {
1154 std::vector<LayerData>& layerData = layerPatternData.back ();
1155 layerData.push_back (LayerData (numNodesPrev));
1156
1157 itInputBegin = _pattern.beginInput ();
1158 itInputEnd = _pattern.endInput ();
1159 layerData.back ().setInput (itInputBegin, itInputEnd);
1160
1161 if (usesDropOut)
1162 layerData.back ().setDropOut (itDropOut);
1163
1164 }
1165
1166
1167 if (usesDropOut)
1168 itDropOut += _layers.back ().numNodes ();
1169
1170 // ---------------- prepare subsequent layers ---------------------------------------------
1171 // for each of the layers
1172 for (auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1173 {
1174 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1175 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1176
1177 auto& layer = *itLayer;
1178 layerPatternData.push_back (std::vector<LayerData>());
1179 // for each pattern, prepare a layerData
1180 for (const Pattern& _pattern : batch)
1181 {
1182 std::vector<LayerData>& layerData = layerPatternData.back ();
1183 //layerData.push_back (LayerData (numNodesPrev));
1184
1185 if (itGradientBegin == itGradientEnd)
1186 {
1187 layerData.push_back (LayerData (layer.numNodes (), itWeight,
1188 layer.activationFunction (),
1189 layer.modeOutputValues ()));
1190 }
1191 else
1192 {
1193 layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient,
1194 layer.activationFunction (),
1195 layer.inverseActivationFunction (),
1196 layer.modeOutputValues ()));
1197 }
1198
1199 if (usesDropOut)
1200 {
1201 layerData.back ().setDropOut (itDropOut);
1202 }
1203
1204 }
1205
1206 if (usesDropOut)
1207 {
1208 itDropOut += layer.numNodes ();
1209 }
1210 size_t _numWeights = layer.numWeights (numNodesPrev);
1211 totalNumWeights += _numWeights;
1212 itWeight += _numWeights;
1213 itGradient += _numWeights;
1214 numNodesPrev = layer.numNodes ();
1215 totalNumNodes += numNodesPrev;
1216
1217 }
1218 assert (totalNumWeights > 0);
1219 return layerPatternData;
1220}
1221
1222
1223
1224 template <typename LayerContainer>
1225 void Net::forwardPattern (const LayerContainer& _layers,
1226 std::vector<LayerData>& layerData) const
1227 {
1228 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1229 size_t cumulativeNodeCount = 0;
1230 for (; idxLayer < idxLayerEnd; ++idxLayer)
1231 {
1232 LayerData& prevLayerData = layerData.at (idxLayer);
1233 LayerData& currLayerData = layerData.at (idxLayer+1);
1234
1235 forward (prevLayerData, currLayerData);
1236
1237 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1238 }
1239 }
1240
1241
1242
1243
1244 template <typename LayerContainer, typename LayerPatternContainer>
1245 void Net::forwardBatch (const LayerContainer& _layers,
1246 LayerPatternContainer& layerPatternData,
1247 std::vector<double>& valuesMean,
1248 std::vector<double>& valuesStdDev,
1249 size_t trainFromLayer) const
1250 {
1251 valuesMean.clear ();
1252 valuesStdDev.clear ();
1253
1254 // ---------------------------------- loop over layers and pattern -------------------------------------------------------
1255 size_t cumulativeNodeCount = 0;
1256 for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1257 {
1258 bool doTraining = idxLayer >= trainFromLayer;
1259
1260 // get layer-pattern data for this and the corresponding one from the next layer
1261 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1262 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1263
1264 size_t numPattern = prevLayerPatternData.size ();
1265 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1266
1267 std::vector<MeanVariance> means (numNodesLayer);
1268 // ---------------- loop over layerDatas of pattern compute forward ----------------------------
1269 for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1270 {
1271 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1272 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1273
1274
1275 forward (prevLayerData, currLayerData); // feed forward
1276 }
1277
1278 // ---------------- loop over layerDatas of pattern apply non-linearities ----------------------------
1279 for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1280 {
1281 //const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1282 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1283
1284 if (doTraining)
1285 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (),
1286 currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ());
1287 else
1288 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1289 }
1290
1291 // accumulate node count
1292 cumulativeNodeCount += numNodesLayer;
1293 }
1294}
1295
1296
1297
1298
1299 template <typename OutputContainer>
1300 void Net::fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const
1301 {
1302 ModeOutputValues eModeOutput = lastLayerData.outputMode ();
1303 if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput))
1304 {
1305 outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ());
1306 }
1307 else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) ||
1308 isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput))
1309 {
1310 const auto& prob = lastLayerData.probabilities ();
1311 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1312 }
1313 else
1314 assert (false);
1315 }
1316
1317
1318
1319
1320 template <typename OutputContainer>
1321 void Net::fetchOutput (const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer) const
1322 {
1323 for (const LayerData& lastLayerData : lastLayerPatternData)
1324 fetchOutput (lastLayerData, outputContainer);
1325 }
1326
1327
1328
1329 template <typename ItWeight>
1330 std::tuple</*sumError*/double,/*sumWeights*/double> Net::computeError (const Settings& settings,
1331 std::vector<LayerData>& lastLayerData,
1332 Batch& batch,
1333 ItWeight itWeightBegin,
1334 ItWeight itWeightEnd) const
1335 {
1336 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1337// typename std::vector<LayerData>::iterator itLayerDataEnd = lastLayerData.end ();
1338
1339 typename std::vector<Pattern>::const_iterator itPattern = batch.begin ();
1340 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.end ();
1341
1342 double sumWeights (0.0);
1343 double sumError (0.0);
1344
1345 size_t idxPattern = 0;
1346// FIXME: check that iteration doesn't go beyond itLayerDataEnd!
1347 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1348 {
1349 ++idxPattern;
1350
1351 // compute E and the deltas of the computed output and the true output
1352 LayerData& layerData = (*itLayerData);
1353 const Pattern& _pattern = (*itPattern);
1354 double error = errorFunction (layerData, _pattern.output (),
1355 itWeightBegin, itWeightEnd,
1356 _pattern.weight (), settings.factorWeightDecay (),
1357 settings.regularization ());
1358 sumWeights += fabs (_pattern.weight ());
1359 sumError += error;
1360 }
1361 return std::make_tuple (sumError, sumWeights);
1362 }
1363
1364
1365
1366 template <typename Settings>
1367 void Net::backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1368 const Settings& settings,
1369 size_t trainFromLayer,
1370 size_t totalNumWeights) const
1371 {
1372 bool doTraining = layerPatternData.size () > trainFromLayer;
1373 if (doTraining) // training
1374 {
1375 // ------------- backpropagation -------------
1376 size_t idxLayer = layerPatternData.size ();
1377 for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1378 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1379 {
1380 --idxLayer;
1381 if (idxLayer <= trainFromLayer) // no training
1382 break;
1383
1384 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1385 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1386
1387 size_t idxPattern = 0;
1388// FIXME: check that itPrevLayerData doesn't go beyond itPrevLayerDataEnd!
1389 for (typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1390 itPrevLayerData = begin (prevLayerDataColl) /*, itPrevLayerDataEnd = end (prevLayerDataColl)*/;
1391 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1392 {
1393 LayerData& currLayerData = (*itCurrLayerData);
1394 LayerData& prevLayerData = *(itPrevLayerData);
1395
1396 backward (prevLayerData, currLayerData);
1397
1398 // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses)
1399 // because L1 and L2 regularization
1400 //
1401 // http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization
1402 //
1403 // L1 : -factorWeightDecay*sgn(w)/numWeights
1404 // L2 : -factorWeightDecay/numWeights
1405 update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ());
1406 }
1407 }
1408 }
1409 }
1410
1411
1412
1413/*! \brief forward propagation and backward propagation
1414 *
1415 *
1416 */
1417 template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1418 double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch,
1419 ItWeight itWeightBegin, ItWeight itWeightEnd,
1420 ItGradient itGradientBegin, ItGradient itGradientEnd,
1421 size_t trainFromLayer,
1422 OutContainer& outputContainer, bool doFetchOutput) const
1423 {
1424 Settings& settings = std::get<0>(settingsAndBatch);
1425 Batch& batch = std::get<1>(settingsAndBatch);
1426 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1427
1428 double sumError = 0.0;
1429 double sumWeights = 0.0; // -------------
1430
1431
1432 // ----------------------------- prepare layer data -------------------------------------
1433 size_t totalNumWeights (0);
1434 std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1435 batch,
1436 dropContainer,
1437 itWeightBegin,
1438 itWeightEnd,
1439 itGradientBegin,
1440 itGradientEnd,
1441 totalNumWeights);
1442
1443
1444
1445 // ---------------------------------- propagate forward ------------------------------------------------------------------
1446 std::vector<double> valuesMean;
1447 std::vector<double> valuesStdDev;
1448 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1449
1450
1451 // ------------- fetch output ------------------
1452 if (doFetchOutput)
1453 {
1454 fetchOutput (layerPatternData.back (), outputContainer);
1455 }
1456
1457
1458 // ------------- error computation -------------
1459 std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1460
1461
1462 // ------------- backpropagation -------------
1463 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1464
1465
1466 // --- compile the measures
1467 double batchSize = std::distance (std::begin (batch), std::end (batch));
1468 for (auto it = itGradientBegin; it != itGradientEnd; ++it)
1469 (*it) /= batchSize;
1470
1471
1472 sumError /= sumWeights;
1473 return sumError;
1474 }
1475
1476
1477
1478/*! \brief initialization of the weights
1479 *
1480 *
1481 */
1482 template <typename OutIterator>
1483 void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
1484 {
1485 if (eInitStrategy == WeightInitializationStrategy::XAVIER)
1486 {
1487 // input and output properties
1488 int numInput = inputSize ();
1489
1490 // compute variance and mean of input and output
1491 //...
1492
1493
1494 // compute the weights
1495 for (auto& layer: layers ())
1496 {
1497 double nIn = numInput;
1498 double stdDev = sqrt (2.0/nIn);
1499 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1500 {
1501 (*itWeight) = DNN::gaussDouble (0.0, stdDev); // factor 2.0 for ReLU
1502 ++itWeight;
1503 }
1504 numInput = layer.numNodes ();
1505 }
1506 return;
1507 }
1508
1510 {
1511 // input and output properties
1512 int numInput = inputSize ();
1513
1514 // compute variance and mean of input and output
1515 //...
1516
1517
1518 // compute the weights
1519 for (auto& layer: layers ())
1520 {
1521 double nIn = numInput;
1522 double minVal = -sqrt(2.0/nIn);
1523 double maxVal = sqrt (2.0/nIn);
1524 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1525 {
1526
1527 (*itWeight) = DNN::uniformDouble (minVal, maxVal); // factor 2.0 for ReLU
1528 ++itWeight;
1529 }
1530 numInput = layer.numNodes ();
1531 }
1532 return;
1533 }
1534
1535 if (eInitStrategy == WeightInitializationStrategy::TEST)
1536 {
1537 // input and output properties
1538 int numInput = inputSize ();
1539
1540 // compute variance and mean of input and output
1541 //...
1542
1543
1544 // compute the weights
1545 for (auto& layer: layers ())
1546 {
1547// double nIn = numInput;
1548 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1549 {
1550 (*itWeight) = DNN::gaussDouble (0.0, 0.1);
1551 ++itWeight;
1552 }
1553 numInput = layer.numNodes ();
1554 }
1555 return;
1556 }
1557
1558 if (eInitStrategy == WeightInitializationStrategy::LAYERSIZE)
1559 {
1560 // input and output properties
1561 int numInput = inputSize ();
1562
1563 // compute variance and mean of input and output
1564 //...
1565
1566
1567 // compute the weights
1568 for (auto& layer: layers ())
1569 {
1570 double nIn = numInput;
1571 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1572 {
1573 (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn))); // factor 2.0 for ReLU
1574 ++itWeight;
1575 }
1576 numInput = layer.numNodes ();
1577 }
1578 return;
1579 }
1580
1581 }
1582
1583
1584
1585
1586
1587/*! \brief compute the error function
1588 *
1589 *
1590 */
1591 template <typename Container, typename ItWeight>
1592 double Net::errorFunction (LayerData& layerData,
1593 Container truth,
1594 ItWeight itWeight,
1595 ItWeight itWeightEnd,
1596 double patternWeight,
1597 double factorWeightDecay,
1598 EnumRegularization eRegularization) const
1599 {
1600 double error (0);
1601 switch (m_eErrorFunction)
1602 {
1604 {
1605 error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth),
1606 layerData.deltasBegin (), layerData.deltasEnd (),
1607 layerData.inverseActivationFunction (),
1608 patternWeight);
1609 break;
1610 }
1612 {
1614 std::vector<double> probabilities = layerData.probabilities ();
1615 error = crossEntropy (begin (probabilities), end (probabilities),
1616 begin (truth), end (truth),
1617 layerData.deltasBegin (), layerData.deltasEnd (),
1618 layerData.inverseActivationFunction (),
1619 patternWeight);
1620 break;
1621 }
1623 {
1624 std::cout << "softmax." << std::endl;
1626 std::vector<double> probabilities = layerData.probabilities ();
1627 error = softMaxCrossEntropy (begin (probabilities), end (probabilities),
1628 begin (truth), end (truth),
1629 layerData.deltasBegin (), layerData.deltasEnd (),
1630 layerData.inverseActivationFunction (),
1631 patternWeight);
1632 break;
1633 }
1634 }
1635 if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE)
1636 {
1637 error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
1638 }
1639 return error;
1640 }
1641
1642
1643
1644
1645
1646
1647
1648// /*! \brief pre-training
1649// *
1650// * in development
1651// */
1652// template <typename Minimizer>
1653// void Net::preTrain (std::vector<double>& weights,
1654// std::vector<Pattern>& trainPattern,
1655// const std::vector<Pattern>& testPattern,
1656// Minimizer& minimizer, Settings& settings)
1657// {
1658// auto itWeightGeneral = std::begin (weights);
1659// std::vector<Pattern> prePatternTrain (trainPattern.size ());
1660// std::vector<Pattern> prePatternTest (testPattern.size ());
1661
1662// size_t _inputSize = inputSize ();
1663
1664// // transform pattern using the created preNet
1665// auto initializePrePattern = [&](const std::vector<Pattern>& pttrnInput, std::vector<Pattern>& pttrnOutput)
1666// {
1667// pttrnOutput.clear ();
1668// std::transform (std::begin (pttrnInput), std::end (pttrnInput),
1669// std::back_inserter (pttrnOutput),
1670// [](const Pattern& p)
1671// {
1672// Pattern pat (p.input (), p.input (), p.weight ());
1673// return pat;
1674// });
1675// };
1676
1677// initializePrePattern (trainPattern, prePatternTrain);
1678// initializePrePattern (testPattern, prePatternTest);
1679
1680// std::vector<double> originalDropFractions = settings.dropFractions ();
1681
1682// for (auto& _layer : layers ())
1683// {
1684// // compute number of weights (as a function of the number of incoming nodes)
1685// // fetch number of nodes
1686// size_t numNodes = _layer.numNodes ();
1687// size_t _numWeights = _layer.numWeights (_inputSize);
1688
1689// // ------------------
1690// DNN::Net preNet;
1691// if (!originalDropFractions.empty ())
1692// {
1693// originalDropFractions.erase (originalDropFractions.begin ());
1694// settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ());
1695// }
1696// std::vector<double> preWeights;
1697
1698// // define the preNet (pretraining-net) for this layer
1699// // outputSize == inputSize, because this is an autoencoder;
1700// preNet.setInputSize (_inputSize);
1701// preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ()));
1702// preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT));
1703// preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES);
1704// preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder)
1705
1706// // initialize weights
1707// preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM,
1708// std::back_inserter (preWeights));
1709
1710// // overwrite already existing weights from the "general" weights
1711// std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ());
1712// std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer
1713
1714
1715// // train the "preNet"
1716// preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings);
1717
1718// // fetch the pre-trained weights (without the output part of the autoencoder)
1719// std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral);
1720
1721// // advance the iterator on the incoming weights
1722// itWeightGeneral += _numWeights;
1723
1724// // remove the weights of the output layer of the preNet
1725// preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ());
1726
1727// // remove the outputLayer of the preNet
1728// preNet.removeLayer ();
1729
1730// // set the output size to the number of nodes in the new output layer (== last hidden layer)
1731// preNet.setOutputSize (numNodes);
1732
1733// // transform pattern using the created preNet
1734// auto proceedPattern = [&](std::vector<Pattern>& pttrn)
1735// {
1736// std::vector<Pattern> newPttrn;
1737// std::for_each (std::begin (pttrn), std::end (pttrn),
1738// [&preNet,&preWeights,&newPttrn](Pattern& p)
1739// {
1740// std::vector<double> output = preNet.compute (p.input (), preWeights);
1741// Pattern pat (output, output, p.weight ());
1742// newPttrn.push_back (pat);
1743// // p = pat;
1744// });
1745// return newPttrn;
1746// };
1747
1748
1749// prePatternTrain = proceedPattern (prePatternTrain);
1750// prePatternTest = proceedPattern (prePatternTest);
1751
1752
1753// // the new input size is the output size of the already reduced preNet
1754// _inputSize = preNet.layers ().back ().numNodes ();
1755// }
1756// }
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773 } // namespace DNN
1774} // namespace TMVA
1775
1776#endif
double
#define f(i)
Definition RSha256.hxx:104
#define g(i)
Definition RSha256.hxx:105
@ kMagenta
Definition Rtypes.h:66
@ kBlue
Definition Rtypes.h:66
double sqrt(double)
double log(double)
char * Form(const char *fmt,...)
#define MATH_UNUSED(var)
Definition Util.h:34
Double_t(* Function)(Double_t)
Definition Functor.C:4
double weight() const
Definition Pattern.h:74
std::vector< double >::const_iterator const_iterator
Definition Pattern.h:12
std::vector< double > & output()
Definition Pattern.h:84
The Batch class encapsulates one mini-batch.
Definition NeuralNet.h:235
const_iterator begin() const
Definition NeuralNet.h:244
const_iterator end() const
Definition NeuralNet.h:245
size_t size() const
Definition NeuralNet.h:247
LayerData holds the data of one layer.
Definition NeuralNet.h:437
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition NeuralNet.h:583
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition NeuralNet.h:597
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition NeuralNet.h:591
ModeOutputValues outputMode() const
returns the output mode
Definition NeuralNet.h:588
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition NeuralNet.h:608
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition NeuralNet.h:592
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition NeuralNet.h:607
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition NeuralNet.h:589
DropContainer::const_iterator const_dropout_iterator
Definition NeuralNet.h:448
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition NeuralNet.h:582
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
bool * fExitFromTraining
Definition NeuralNet.h:1277
std::vector< Layer > m_layers
layer-structure-data
Definition NeuralNet.h:1272
UInt_t * fIPyMaxIter
Definition NeuralNet.h:1278
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
size_t inputSize() const
input size of the DNN
Definition NeuralNet.h:1098
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition NeuralNet.h:1269
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition NeuralNet.h:1245
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
size_t outputSize() const
output size of the DNN
Definition NeuralNet.h:1099
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
UInt_t * fIPyCurrentIter
Definition NeuralNet.h:1278
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
IPythonInteractive * fInteractive
Definition NeuralNet.h:1276
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Settings for the training of the neural net.
Definition NeuralNet.h:730
bool useMultithreading() const
is multithreading turned on?
Definition NeuralNet.h:815
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition NeuralNet.h:813
size_t convergenceCount() const
returns the current convergence count
Definition NeuralNet.h:827
size_t testRepetitions() const
how often is the test data tested
Definition NeuralNet.h:768
virtual void endTestCycle()
callback for monitoring and loggging
Definition NeuralNet.h:805
virtual void testIteration()
callback for monitoring and loggging
Definition NeuralNet.h:806
virtual bool hasConverged(double testError)
has this training converged already?
virtual void cycle(double progress, TString text)
Definition NeuralNet.h:799
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition NeuralNet.h:788
const std::vector< double > & dropFractions() const
Definition NeuralNet.h:762
void addPoint(std::string histoName, double x)
for monitoring
Definition NeuralNet.h:821
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition NeuralNet.h:781
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition NeuralNet.h:823
virtual void startTrainCycle()
Definition NeuralNet.h:782
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition NeuralNet.h:766
double factorWeightDecay() const
get the weight-decay factor
Definition NeuralNet.h:769
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition NeuralNet.h:828
void pads(int numPads)
preparation for monitoring
Definition NeuralNet.h:818
size_t batchSize() const
mini-batch size
Definition NeuralNet.h:767
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition NeuralNet.h:809
size_t dropRepetitions() const
Definition NeuralNet.h:761
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition NeuralNet.h:819
virtual void startTestCycle()
callback for monitoring and loggging
Definition NeuralNet.h:804
double m_beta
internal parameter (momentum)
Definition NeuralNet.h:372
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition NeuralNet.h:376
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition NeuralNet.h:373
double m_alpha
internal parameter (learningRate)
Definition NeuralNet.h:371
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition NeuralNet.h:375
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Basic string class.
Definition TString.h:136
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
std::shared_ptr< std::function< double(double)> > InvGauss
Definition NeuralNet.cxx:14
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double uniformDouble(double minValue, double maxValue)
Definition NeuralNet.cxx:43
std::shared_ptr< std::function< double(double)> > SymmReLU
Definition NeuralNet.cxx:30
std::shared_ptr< std::function< double(double)> > TanhShift
Definition NeuralNet.cxx:31
std::shared_ptr< std::function< double(double)> > Tanh
Definition NeuralNet.cxx:29
std::shared_ptr< std::function< double(double)> > InvSigmoid
Definition NeuralNet.cxx:18
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
T uniformFromTo(T from, T to)
Definition NeuralNet.icc:34
double computeRegularization< EnumRegularization::L1 >(double weight, const double &factorWeightDecay)
std::shared_ptr< std::function< double(double)> > SoftPlus
Definition NeuralNet.cxx:27
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
std::shared_ptr< std::function< double(double)> > ZeroFnc
Definition NeuralNet.cxx:28
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
std::shared_ptr< std::function< double(double)> > InvSoftSign
Definition NeuralNet.cxx:20
std::shared_ptr< std::function< double(double)> > InvGaussComplement
Definition NeuralNet.cxx:15
double computeRegularization< EnumRegularization::L2 >(double weight, const double &factorWeightDecay)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition Functions.h:238
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
std::shared_ptr< std::function< double(double)> > InvTanh
Definition NeuralNet.cxx:22
std::shared_ptr< std::function< double(double)> > Linear
Definition NeuralNet.cxx:24
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition NeuralNet.h:1050
std::shared_ptr< std::function< double(double)> > InvReLU
Definition NeuralNet.cxx:17
std::shared_ptr< std::function< double(double)> > GaussComplement
Definition NeuralNet.cxx:13
std::shared_ptr< std::function< double(double)> > Gauss
Definition NeuralNet.cxx:12
std::shared_ptr< std::function< double(double)> > Sigmoid
Definition NeuralNet.cxx:26
double gaussDouble(double mean, double sigma)
Definition NeuralNet.cxx:35
std::shared_ptr< std::function< double(double)> > SoftSign
Definition NeuralNet.cxx:32
std::shared_ptr< std::function< double(double)> > InvSoftPlus
Definition NeuralNet.cxx:19
std::shared_ptr< std::function< double(double)> > ReLU
Definition NeuralNet.cxx:25
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition NeuralNet.h:1294
bool isFlagSet(T flag, T value)
Definition NeuralNet.h:212
std::shared_ptr< std::function< double(double)> > InvTanhShift
Definition NeuralNet.cxx:23
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
std::vector< char > DropContainer
Definition NeuralNet.h:227
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
std::shared_ptr< std::function< double(double)> > InvSymmReLU
Definition NeuralNet.cxx:21
std::shared_ptr< std::function< double(double)> > InvLinear
Definition NeuralNet.cxx:16
create variable transformations
static void output(int code)
Definition gifencode.c:226