Logo ROOT   6.18/05
Reference Guide
NeuralNet.icc
Go to the documentation of this file.
1#ifndef TMVA_NEURAL_NET_I
2#define TMVA_NEURAL_NET_I
3
4#ifndef TMVA_NEURAL_NET
5#error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."
6#endif // TMVA_NEURAL_NET
7#pragma once
8#pragma GCC diagnostic ignored "-Wunused-variable"
9
10#include "Math/Util.h"
11
12#include "TMVA/Pattern.h"
13#include "TMVA/MethodBase.h"
14
15#include <tuple>
16#include <future>
17#include <random>
18
19namespace TMVA
20{
21 namespace DNN
22 {
23
24
25
26
27
28
29
30
31 template <typename T>
32 T uniformFromTo (T from, T to)
33 {
34 return from + (rand ()* (to - from)/RAND_MAX);
35 }
36
37
38
39 template <typename Container, typename T>
40 void uniformDouble (Container& container, T maxValue)
41 {
42 for (auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
43 {
44// (*it) = uniformFromTo (-1.0*maxValue, 1.0*maxValue);
45 (*it) = TMVA::DNN::uniformFromTo (-1.0*maxValue, 1.0*maxValue);
46 }
47 }
48
49
50 extern std::shared_ptr<std::function<double(double)>> ZeroFnc;
51
52
53 extern std::shared_ptr<std::function<double(double)>> Sigmoid;
54 extern std::shared_ptr<std::function<double(double)>> InvSigmoid;
55
56 extern std::shared_ptr<std::function<double(double)>> Tanh;
57 extern std::shared_ptr<std::function<double(double)>> InvTanh;
58
59 extern std::shared_ptr<std::function<double(double)>> Linear;
60 extern std::shared_ptr<std::function<double(double)>> InvLinear;
61
62 extern std::shared_ptr<std::function<double(double)>> SymmReLU;
63 extern std::shared_ptr<std::function<double(double)>> InvSymmReLU;
64
65 extern std::shared_ptr<std::function<double(double)>> ReLU;
66 extern std::shared_ptr<std::function<double(double)>> InvReLU;
67
68 extern std::shared_ptr<std::function<double(double)>> SoftPlus;
69 extern std::shared_ptr<std::function<double(double)>> InvSoftPlus;
70
71 extern std::shared_ptr<std::function<double(double)>> TanhShift;
72 extern std::shared_ptr<std::function<double(double)>> InvTanhShift;
73
74 extern std::shared_ptr<std::function<double(double)>> SoftSign;
75 extern std::shared_ptr<std::function<double(double)>> InvSoftSign;
76
77 extern std::shared_ptr<std::function<double(double)>> Gauss;
78 extern std::shared_ptr<std::function<double(double)>> InvGauss;
79
80 extern std::shared_ptr<std::function<double(double)>> GaussComplement;
81 extern std::shared_ptr<std::function<double(double)>> InvGaussComplement;
82
83
84/*! \brief apply weights using drop-out; for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
85 *
86 * itDrop correlates with itSourceBegin
87 */
88template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItTarget, typename ItDrop>
89 void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd,
90 ItWeight itWeight,
91 ItTarget itTargetBegin, ItTarget itTargetEnd,
92 ItDrop itDrop)
93 {
94 for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
95 {
96 for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
97 {
98 if (!HasDropOut || *itDrop)
99 (*itTarget) += (*itSource) * (*itWeight);
100 ++itWeight;
101 }
102 if (HasDropOut) ++itDrop;
103 }
104 }
105
106
107
108
109
110
111/*! \brief apply weights backwards (for backprop); for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
112 *
113 * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards)
114 */
115template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev, typename ItDrop>
116 void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd,
117 ItWeight itWeight,
118 ItPrev itPrevBegin, ItPrev itPrevEnd,
119 ItDrop itDrop)
120 {
121 for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
122 {
123 for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
124 {
125 if (!HasDropOut || *itDrop)
126 (*itPrev) += (*itCurr) * (*itWeight);
127 ++itWeight;
128 }
129 if (HasDropOut) ++itDrop;
130 }
131 }
132
133
134
135
136
137
138
139/*! \brief apply the activation functions
140 *
141 *
142 */
143
144 template <typename ItValue, typename Fnc>
145 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc)
146 {
147 while (itValue != itValueEnd)
148 {
149 auto& value = (*itValue);
150 value = (*fnc.get ()) (value);
151
152 ++itValue;
153 }
154 }
155
156
157/*! \brief apply the activation functions and compute the gradient
158 *
159 *
160 */
161 template <typename ItValue, typename Fnc, typename InvFnc, typename ItGradient>
162 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
163 {
164 while (itValue != itValueEnd)
165 {
166 auto& value = (*itValue);
167 value = (*fnc.get ()) (value);
168 (*itGradient) = (*invFnc.get ()) (value);
169
170 ++itValue; ++itGradient;
171 }
172 }
173
174
175
176/*! \brief update the gradients
177 *
178 *
179 */
180 template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
181 void update (ItSource itSource, ItSource itSourceEnd,
182 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
183 ItTargetGradient itTargetGradientBegin,
184 ItGradient itGradient)
185 {
186 while (itSource != itSourceEnd)
187 {
188 auto itTargetDelta = itTargetDeltaBegin;
189 auto itTargetGradient = itTargetGradientBegin;
190 while (itTargetDelta != itTargetDeltaEnd)
191 {
192 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
193 ++itTargetDelta; ++itTargetGradient; ++itGradient;
194 }
195 ++itSource;
196 }
197 }
198
199
200
201
202/*! \brief compute the regularization (L1, L2)
203 *
204 *
205 */
206 template <EnumRegularization Regularization>
207 inline double computeRegularization (double weight, const double& factorWeightDecay)
208 {
209 MATH_UNUSED(weight);
210 MATH_UNUSED(factorWeightDecay);
211
212 return 0;
213 }
214
215// L1 regularization
216 template <>
217 inline double computeRegularization<EnumRegularization::L1> (double weight, const double& factorWeightDecay)
218 {
219 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
220 }
221
222// L2 regularization
223 template <>
224 inline double computeRegularization<EnumRegularization::L2> (double weight, const double& factorWeightDecay)
225 {
226 return factorWeightDecay * weight;
227 }
228
229
230/*! \brief update the gradients, using regularization
231 *
232 *
233 */
234 template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
235 void update (ItSource itSource, ItSource itSourceEnd,
236 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
237 ItTargetGradient itTargetGradientBegin,
238 ItGradient itGradient,
239 ItWeight itWeight, double weightDecay)
240 {
241 // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights
242 while (itSource != itSourceEnd)
243 {
244 auto itTargetDelta = itTargetDeltaBegin;
245 auto itTargetGradient = itTargetGradientBegin;
246 while (itTargetDelta != itTargetDeltaEnd)
247 {
248 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,weightDecay);
249 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
250 }
251 ++itSource;
252 }
253 }
254
255
256
257
258
259
260#define USELOCALWEIGHTS 1
261
262
263
264/*! \brief implementation of the steepest gradient descent algorithm
265 *
266 * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle
267 */
268 template <typename Function, typename Weights, typename PassThrough>
269 double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough)
270 {
271 size_t numWeights = weights.size ();
272 // std::vector<double> gradients (numWeights, 0.0);
273 m_localGradients.assign (numWeights, 0.0);
274 // std::vector<double> localWeights (begin (weights), end (weights));
275 // m_localWeights.reserve (numWeights);
276 m_localWeights.assign (begin (weights), end (weights));
277
278 double E = 1e10;
279 if (m_prevGradients.size () != numWeights)
280 {
281 m_prevGradients.clear ();
282 m_prevGradients.assign (weights.size (), 0);
283 }
284
285 bool success = true;
286 size_t currentRepetition = 0;
287 while (success)
288 {
289 if (currentRepetition >= m_repetitions)
290 break;
291
292 m_localGradients.assign (numWeights, 0.0);
293
294 // --- nesterov momentum ---
295 // apply momentum before computing the new gradient
296 auto itPrevG = begin (m_prevGradients);
297 auto itPrevGEnd = end (m_prevGradients);
298 auto itLocWeight = begin (m_localWeights);
299 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
300 {
301 (*itPrevG) *= m_beta;
302 (*itLocWeight) += (*itPrevG);
303 }
304
305 E = fitnessFunction (passThrough, m_localWeights, m_localGradients);
306// plotGradients (gradients);
307// plotWeights (localWeights);
308
309 double alpha = gaussDouble (m_alpha, m_alpha/2.0);
310// double alpha = m_alpha;
311
312 auto itG = begin (m_localGradients);
313 auto itGEnd = end (m_localGradients);
314 itPrevG = begin (m_prevGradients);
315 double maxGrad = 0.0;
316 for (; itG != itGEnd; ++itG, ++itPrevG)
317 {
318 double currGrad = (*itG);
319 double prevGrad = (*itPrevG);
320 currGrad *= alpha;
321
322 //(*itPrevG) = m_beta * (prevGrad + currGrad);
323 currGrad += prevGrad;
324 (*itG) = currGrad;
325 (*itPrevG) = currGrad;
326
327 if (std::fabs (currGrad) > maxGrad)
328 maxGrad = currGrad;
329 }
330
331 if (maxGrad > 1)
332 {
333 m_alpha /= 2;
334 std::cout << "\nlearning rate reduced to " << m_alpha << std::endl;
335 std::for_each (weights.begin (), weights.end (), [maxGrad](double& w)
336 {
337 w /= maxGrad;
338 });
339 m_prevGradients.clear ();
340 }
341 else
342 {
343 auto itW = std::begin (weights);
344 std::for_each (std::begin (m_localGradients), std::end (m_localGradients), [&itW](double& g)
345 {
346 *itW += g;
347 ++itW;
348 });
349 }
350
351 ++currentRepetition;
352 }
353 return E;
354 }
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375/*! \brief sum of squares error function
376 *
377 *
378 */
379 template <typename ItOutput, typename ItTruth, typename ItDelta, typename InvFnc>
380 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight)
381 {
382 double errorSum = 0.0;
383
384 // output - truth
385 ItTruth itTruth = itTruthBegin;
386 bool hasDeltas = (itDelta != itDeltaEnd);
387 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
388 {
389// assert (itTruth != itTruthEnd);
390 double output = (*itOutput);
391 double error = output - (*itTruth);
392 if (hasDeltas)
393 {
394 (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
395 ++itDelta;
396 }
397 errorSum += error*error * patternWeight;
398 }
399
400 return 0.5*errorSum;
401 }
402
403
404
405/*! \brief cross entropy error function
406 *
407 *
408 */
409 template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
410 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
411 {
412 bool hasDeltas = (itDelta != itDeltaEnd);
413
414 double errorSum = 0.0;
415 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
416 {
417 double probability = *itProbability;
418 double truth = *itTruthBegin;
419 /* truth = truth < 0.1 ? 0.1 : truth; */
420 /* truth = truth > 0.9 ? 0.9 : truth; */
421 truth = truth < 0.5 ? 0.1 : 0.9;
422 if (hasDeltas)
423 {
424 double delta = probability - truth;
425 (*itDelta) = delta*patternWeight;
426// (*itDelta) = (*itInvActFnc)(probability) * delta * patternWeight;
427 ++itDelta;
428 }
429 double error (0);
430 if (probability == 0) // protection against log (0)
431 {
432 if (truth >= 0.5)
433 error += 1.0;
434 }
435 else if (probability == 1)
436 {
437 if (truth < 0.5)
438 error += 1.0;
439 }
440 else
441 error += - (truth * log (probability) + (1.0-truth) * log (1.0-probability)); // cross entropy function
442 errorSum += error * patternWeight;
443
444 }
445 return errorSum;
446 }
447
448
449
450
451/*! \brief soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
452 *
453 *
454 */
455 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
456 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
457 {
458 double errorSum = 0.0;
459
460 bool hasDeltas = (itDelta != itDeltaEnd);
461 // output - truth
462 ItTruth itTruth = itTruthBegin;
463 for (auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
464 {
465// assert (itTruth != itTruthEnd);
466 double probability = (*itProbability);
467 double truth = (*itTruth);
468 if (hasDeltas)
469 {
470 (*itDelta) = probability - truth;
471// (*itDelta) = (*itInvActFnc)(sm) * delta * patternWeight;
472 ++itDelta; //++itInvActFnc;
473 }
474 double error (0);
475
476 error += truth * log (probability);
477 errorSum += error;
478 }
479
480 return -errorSum * patternWeight;
481 }
482
483
484
485
486
487
488
489
490
491/*! \brief compute the weight decay for regularization (L1 or L2)
492 *
493 *
494 */
495 template <typename ItWeight>
496 double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
497 {
498 if (eRegularization == EnumRegularization::L1)
499 {
500 // weight decay (regularization)
501 double w = 0;
502 size_t n = 0;
503 for (; itWeight != itWeightEnd; ++itWeight, ++n)
504 {
505 double weight = (*itWeight);
506 w += std::fabs (weight);
507 }
508 return error + 0.5 * w * factorWeightDecay / n;
509 }
510 else if (eRegularization == EnumRegularization::L2)
511 {
512 // weight decay (regularization)
513 double w = 0;
514 size_t n = 0;
515 for (; itWeight != itWeightEnd; ++itWeight, ++n)
516 {
517 double weight = (*itWeight);
518 w += weight*weight;
519 }
520 return error + 0.5 * w * factorWeightDecay / n;
521 }
522 else
523 return error;
524 }
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539/*! \brief apply the weights (and functions) in forward direction of the DNN
540 *
541 *
542 */
543 template <typename LAYERDATA>
544 void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
545 {
546 if (prevLayerData.hasDropOut ())
547 {
548 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
549 currLayerData.weightsBegin (),
550 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
551 prevLayerData.dropOut ());
552 }
553 else
554 {
555 bool dummy = true;
556 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
557 currLayerData.weightsBegin (),
558 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
559 &dummy); // dummy to turn on all nodes (no drop out)
560 }
561 }
562
563
564
565/*! \brief backward application of the weights (back-propagation of the error)
566 *
567 *
568 */
569template <typename LAYERDATA>
570 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
571{
572 if (prevLayerData.hasDropOut ())
573 {
574 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
575 currLayerData.weightsBegin (),
576 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
577 prevLayerData.dropOut ());
578 }
579 else
580 {
581 bool dummy = true;
582 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
583 currLayerData.weightsBegin (),
584 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
585 &dummy); // dummy to use all nodes (no drop out)
586 }
587}
588
589
590
591
592
593/*! \brief update the node values
594 *
595 *
596 */
597 template <typename LAYERDATA>
598 void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization)
599 {
600 // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights
601 if (factorWeightDecay != 0.0) // has weight regularization
602 if (regularization == EnumRegularization::L1) // L1 regularization ( sum(|w|) )
603 {
604 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
605 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
606 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
607 currLayerData.weightsBegin (), factorWeightDecay);
608 }
609 else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) )
610 {
611 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
612 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
613 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
614 currLayerData.weightsBegin (), factorWeightDecay);
615 }
616 else
617 {
618 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
619 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
620 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
621 }
622
623 else
624 { // no weight regularization
625 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
626 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
627 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
628 }
629 }
630
631
632
633
634
635
636
637
638
639
640
641
642/*! \brief compute the drop-out-weight factor
643 *
644 * when using drop-out a fraction of the nodes is turned off at each cycle of the computation
645 * once all nodes are turned on again (for instances when the test samples are evaluated),
646 * the weights have to be adjusted to account for the different number of active nodes
647 * this function computes the factor and applies it to the weights
648 */
649 template <typename WeightsType, typename DropProbabilities>
650 void Net::dropOutWeightFactor (WeightsType& weights,
651 const DropProbabilities& drops,
652 bool inverse)
653 {
654 if (drops.empty () || weights.empty ())
655 return;
656
657 auto itWeight = std::begin (weights);
658 auto itWeightEnd = std::end (weights);
659 auto itDrop = std::begin (drops);
660 auto itDropEnd = std::end (drops);
661 size_t numNodesPrev = inputSize ();
662 double dropFractionPrev = *itDrop;
663 ++itDrop;
664
665 for (auto& layer : layers ())
666 {
667 if (itDrop == itDropEnd)
668 break;
669
670 size_t _numNodes = layer.numNodes ();
671
672 double dropFraction = *itDrop;
673 double pPrev = 1.0 - dropFractionPrev;
674 double p = 1.0 - dropFraction;
675 p *= pPrev;
676
677 if (inverse)
678 {
679 p = 1.0/p;
680 }
681 size_t _numWeights = layer.numWeights (numNodesPrev);
682 for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
683 {
684 if (itWeight == itWeightEnd)
685 break;
686
687 *itWeight *= p;
688 ++itWeight;
689 }
690 numNodesPrev = _numNodes;
691 dropFractionPrev = dropFraction;
692 ++itDrop;
693 }
694 }
695
696
697
698
699
700
701/*! \brief execute the training until convergence emerges
702 *
703 * \param weights the container with the weights (synapses)
704 * \param trainPattern the pattern for the training
705 * \param testPattern the pattern for the testing
706 * \param minimizer the minimizer (e.g. steepest gradient descent) to be used
707 * \param settings the settings for the training (e.g. multithreading or not, regularization etc.)
708 */
709 template <typename Minimizer>
710 double Net::train (std::vector<double>& weights,
711 std::vector<Pattern>& trainPattern,
712 const std::vector<Pattern>& testPattern,
713 Minimizer& minimizer,
714 Settings& settings)
715 {
716// std::cout << "START TRAINING" << std::endl;
717 settings.startTrainCycle ();
718
719 // JsMVA progress bar maximum (100%)
720 if (fIPyMaxIter) *fIPyMaxIter = 100;
721
722 settings.pads (4);
723 settings.create ("trainErrors", 100, 0, 100, 100, 0,1);
724 settings.create ("testErrors", 100, 0, 100, 100, 0,1);
725
726 size_t cycleCount = 0;
727 size_t testCycleCount = 0;
728 double testError = 1e20;
729 double trainError = 1e20;
730 size_t dropOutChangeCount = 0;
731
732 DropContainer dropContainer;
733 DropContainer dropContainerTest;
734 const std::vector<double>& dropFractions = settings.dropFractions ();
735 bool isWeightsForDrop = false;
736
737
738 // until convergence
739 do
740 {
741 ++cycleCount;
742
743 // if dropOut enabled
744 size_t dropIndex = 0;
745 if (!dropFractions.empty () && dropOutChangeCount % settings.dropRepetitions () == 0)
746 {
747 // fill the dropOut-container
748 dropContainer.clear ();
749 size_t _numNodes = inputSize ();
750 double dropFraction = 0.0;
751 dropFraction = dropFractions.at (dropIndex);
752 ++dropIndex;
753 fillDropContainer (dropContainer, dropFraction, _numNodes);
754 for (auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
755 {
756 auto& layer = *itLayer;
757 _numNodes = layer.numNodes ();
758 // how many nodes have to be dropped
759 dropFraction = 0.0;
760 if (dropFractions.size () > dropIndex)
761 dropFraction = dropFractions.at (dropIndex);
762
763 fillDropContainer (dropContainer, dropFraction, _numNodes);
764 }
765 isWeightsForDrop = true;
766 }
767
768 // execute training cycle
769 trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
770
771
772 // ------ check if we have to execute a test ------------------
773 bool hasConverged = false;
774 if (testCycleCount % settings.testRepetitions () == 0) // we test only everye "testRepetitions" repetition
775 {
776 if (isWeightsForDrop)
777 {
778 dropOutWeightFactor (weights, dropFractions);
779 isWeightsForDrop = false;
780 }
781
782
783 testError = 0;
784 //double weightSum = 0;
785 settings.startTestCycle ();
786 if (settings.useMultithreading ())
787 {
788 size_t numThreads = std::thread::hardware_concurrency ();
789 size_t patternPerThread = testPattern.size () / numThreads;
790 std::vector<Batch> batches;
791 auto itPat = testPattern.begin ();
792 // auto itPatEnd = testPattern.end ();
793 for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
794 {
795 batches.push_back (Batch (itPat, itPat + patternPerThread));
796 itPat += patternPerThread;
797 }
798 if (itPat != testPattern.end ())
799 batches.push_back (Batch (itPat, testPattern.end ()));
800
801 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
802 for (auto& batch : batches)
803 {
804 // -------------------- execute each of the batch ranges on a different thread -------------------------------
805 futures.push_back (
806 std::async (std::launch::async, [&]()
807 {
808 std::vector<double> localOutput;
809 pass_through_type passThrough (settings, batch, dropContainerTest);
810 double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput);
811 return std::make_tuple (testBatchError, localOutput);
812 })
813 );
814 }
815
816 auto itBatch = batches.begin ();
817 for (auto& f : futures)
818 {
819 std::tuple<double,std::vector<double>> result = f.get ();
820 testError += std::get<0>(result) / batches.size ();
821 std::vector<double> output = std::get<1>(result);
822 if (output.size() == (outputSize() - 1) * itBatch->size())
823 {
824 auto output_iterator = output.begin();
825 for (auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
826 {
827 for (size_t output_index = 1; output_index < outputSize(); ++output_index)
828 {
829 settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
830 (*pattern_it).weight ());
831 ++output_iterator;
832 }
833 }
834 }
835 ++itBatch;
836 }
837
838 }
839 else
840 {
841 std::vector<double> output;
842 //for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it)
843 {
844 //const Pattern& p = (*it);
845 //double weight = p.weight ();
846 //Batch batch (it, it+1);
847 Batch batch (begin (testPattern), end (testPattern));
848 output.clear ();
849 pass_through_type passThrough (settings, batch, dropContainerTest);
850 double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output);
851 if (output.size() == (outputSize() - 1) * batch.size())
852 {
853 auto output_iterator = output.begin();
854 for (auto pattern_it = batch.begin(); pattern_it != batch.end(); ++pattern_it)
855 {
856 for (size_t output_index = 1; output_index < outputSize(); ++output_index)
857 {
858 settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
859 (*pattern_it).weight ());
860 ++output_iterator;
861 }
862 }
863 }
864 testError += testPatternError; /// batch.size ();
865 }
866 // testError /= testPattern.size ();
867 }
868 settings.endTestCycle ();
869// testError /= weightSum;
870
871 settings.computeResult (*this, weights);
872
873 hasConverged = settings.hasConverged (testError);
874 if (!hasConverged && !isWeightsForDrop)
875 {
876 dropOutWeightFactor (weights, dropFractions, true); // inverse
877 isWeightsForDrop = true;
878 }
879 }
880 ++testCycleCount;
881 ++dropOutChangeCount;
882
883
884 static double x = -1.0;
885 x += 1.0;
886// settings.resetPlot ("errors");
887 settings.addPoint ("trainErrors", cycleCount, trainError);
888 settings.addPoint ("testErrors", cycleCount, testError);
889 settings.plot ("trainErrors", "C", 1, kBlue);
890 settings.plot ("testErrors", "C", 1, kMagenta);
891
892
893 // setup error plots and progress bar variables for JsMVA
894 if (fInteractive){
895 fInteractive->AddPoint(cycleCount, trainError, testError);
896 if (*fExitFromTraining) break;
897 *fIPyCurrentIter = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
898 }
899
900 if (hasConverged)
901 break;
902
903 if ((int)cycleCount % 10 == 0) {
904
905 TString convText = Form( "(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
906 trainError,
907 testError,
908 (int)cycleCount,
909 (int)settings.convergenceCount (),
910 (int)settings.maxConvergenceCount ());
911 double progress = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
912 settings.cycle (progress, convText);
913 }
914 }
915 while (true);
916 settings.endTrainCycle (trainError);
917
918 TString convText = Form( "(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (int)cycleCount);
919 double progress = 100*(double)settings.maxConvergenceCount() /(double)settings.convergenceSteps ();
920 settings.cycle (progress, convText);
921
922 return testError;
923 }
924
925
926
927/*! \brief execute a single training cycle
928 *
929 * uses multithreading if turned on
930 *
931 * \param minimizer the minimizer to be used (e.g. SGD)
932 * \param weights the weight container with all the synapse weights
933 * \param itPatternBegin begin of the pattern container
934 * \parama itPatternEnd the end of the pattern container
935 * \param settings the settings for this training (e.g. multithreading or not, regularization, etc.)
936 * \param dropContainer the data for dropping-out nodes (regularization technique)
937 */
938 template <typename Iterator, typename Minimizer>
939 inline double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weights,
940 Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer)
941 {
942 double error = 0.0;
943 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
944 size_t numBatches = numPattern/settings.batchSize ();
945 size_t numBatches_stored = numBatches;
946
947 std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});
948 Iterator itPatternBatchBegin = itPatternBegin;
949 Iterator itPatternBatchEnd = itPatternBatchBegin;
950
951 // create batches
952 std::vector<Batch> batches;
953 while (numBatches > 0)
954 {
955 std::advance (itPatternBatchEnd, settings.batchSize ());
956 batches.push_back (Batch (itPatternBatchBegin, itPatternBatchEnd));
957 itPatternBatchBegin = itPatternBatchEnd;
958 --numBatches;
959 }
960
961 // add the last pattern to the last batch
962 if (itPatternBatchEnd != itPatternEnd)
963 batches.push_back (Batch (itPatternBatchEnd, itPatternEnd));
964
965
966 ///< turn on multithreading if requested
967 if (settings.useMultithreading ())
968 {
969 // -------------------- divide the batches into bunches for each thread --------------
970 size_t numThreads = std::thread::hardware_concurrency ();
971 size_t batchesPerThread = batches.size () / numThreads;
972 typedef std::vector<Batch>::iterator batch_iterator;
973 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
974 batch_iterator itBatchBegin = std::begin (batches);
975 batch_iterator itBatchCurrEnd = std::begin (batches);
976 batch_iterator itBatchEnd = std::end (batches);
977 for (size_t iT = 0; iT < numThreads; ++iT)
978 {
979 if (iT == numThreads-1)
980 itBatchCurrEnd = itBatchEnd;
981 else
982 std::advance (itBatchCurrEnd, batchesPerThread);
983 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
984 itBatchBegin = itBatchCurrEnd;
985 }
986
987 // -------------------- loop over batches -------------------------------------------
988 std::vector<std::future<double>> futures;
989 for (auto& batchRange : batchVec)
990 {
991 // -------------------- execute each of the batch ranges on a different thread -------------------------------
992 futures.push_back (
993 std::async (std::launch::async, [&]()
994 {
995 double localError = 0.0;
996 for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
997 {
998 Batch& batch = *it;
999 pass_through_type settingsAndBatch (settings, batch, dropContainer);
1000 Minimizer minimizerClone (minimizer);
1001 localError += minimizerClone ((*this), weights, settingsAndBatch); /// call the minimizer
1002 }
1003 return localError;
1004 })
1005 );
1006 }
1007
1008 for (auto& f : futures)
1009 error += f.get ();
1010 }
1011 else
1012 {
1013 for (auto& batch : batches)
1014 {
1015 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1016 error += minimizer ((*this), weights, settingsAndBatch);
1017 }
1018 }
1019
1020 numBatches_stored = std::max (numBatches_stored, size_t(1)); /// normalize the error
1021 error /= numBatches_stored;
1022 settings.testIteration ();
1023
1024 return error;
1025 }
1026
1027
1028
1029
1030
1031/*! \brief compute the neural net
1032 *
1033 * \param input the input data
1034 * \param weights the weight data
1035 */
1036 template <typename Weights>
1037 std::vector<double> Net::compute (const std::vector<double>& input, const Weights& weights) const
1038 {
1039 std::vector<LayerData> layerData;
1040 layerData.reserve (m_layers.size ()+1);
1041 auto itWeight = begin (weights);
1042 auto itInputBegin = begin (input);
1043 auto itInputEnd = end (input);
1044 layerData.push_back (LayerData (itInputBegin, itInputEnd));
1045 size_t numNodesPrev = input.size ();
1046
1047 // -------------------- prepare layer data with one pattern -------------------------------
1048 for (auto& layer: m_layers)
1049 {
1050 layerData.push_back (LayerData (layer.numNodes (), itWeight,
1051 layer.activationFunction (),
1052 layer.modeOutputValues ()));
1053 size_t _numWeights = layer.numWeights (numNodesPrev);
1054 itWeight += _numWeights;
1055 numNodesPrev = layer.numNodes ();
1056 }
1057
1058
1059 // --------- forward -------------
1060 forwardPattern (m_layers, layerData);
1061
1062 // ------------- fetch output ------------------
1063 std::vector<double> output;
1064 fetchOutput (layerData.back (), output);
1065 return output;
1066 }
1067
1068
1069 template <typename Weights, typename PassThrough>
1070 double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights) const
1071 {
1072 std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1073 assert (numWeights () == weights.size ());
1074 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing, false);
1075 return error;
1076 }
1077
1078 template <typename Weights, typename PassThrough, typename OutContainer>
1079 double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput /*eFetch*/, OutContainer& outputContainer) const
1080 {
1081 std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1082 assert (numWeights () == weights.size ());
1083 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer, true);
1084 return error;
1085 }
1086
1087
1088 template <typename Weights, typename Gradients, typename PassThrough>
1089 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const
1090 {
1091 std::vector<double> nothing;
1092 assert (numWeights () == weights.size ());
1093 assert (weights.size () == gradients.size ());
1094 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing, false);
1095 return error;
1096 }
1097
1098 template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1099 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const
1100 {
1101 MATH_UNUSED(eFetch);
1102 assert (numWeights () == weights.size ());
1103 assert (weights.size () == gradients.size ());
1104 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer, true);
1105 return error;
1106 }
1107
1108
1109
1110 template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1111 std::vector<std::vector<LayerData>> Net::prepareLayerData (LayerContainer& _layers,
1112 Batch& batch,
1113 const DropContainer& dropContainer,
1114 ItWeight itWeightBegin,
1115 ItWeight /*itWeightEnd*/,
1116 ItGradient itGradientBegin,
1117 ItGradient itGradientEnd,
1118 size_t& totalNumWeights) const
1119 {
1121 bool usesDropOut = !dropContainer.empty ();
1122 if (usesDropOut)
1123 itDropOut = std::begin (dropContainer);
1124
1125 if (_layers.empty ())
1126 throw std::string ("no layers in this net");
1127
1128
1129 // ----------- create layer data -------------------------------------------------------
1130 //LM- This assert not needed anymore (outputsize is actually numNodes+1)
1131 //assert (_layers.back ().numNodes () == outputSize ());
1132 totalNumWeights = 0;
1133 size_t totalNumNodes = 0;
1134 std::vector<std::vector<LayerData>> layerPatternData;
1135 layerPatternData.reserve (_layers.size ()+1);
1136 ItWeight itWeight = itWeightBegin;
1137 ItGradient itGradient = itGradientBegin;
1138 size_t numNodesPrev = inputSize ();
1139 typename Pattern::const_iterator itInputBegin;
1140 typename Pattern::const_iterator itInputEnd;
1141
1142 // ItWeight itGammaBegin = itWeightBegin + numWeights ();
1143 // ItWeight itBetaBegin = itWeightBegin + numWeights () + numNodes ();
1144 // ItGradient itGradGammaBegin = itGradientBegin + numWeights ();
1145 // ItGradient itGradBetaBegin = itGradientBegin + numWeights () + numNodes ();
1146
1147
1148 // --------------------- prepare layer data for input layer ----------------------------
1149 layerPatternData.push_back (std::vector<LayerData>());
1150 for (const Pattern& _pattern : batch)
1151 {
1152 std::vector<LayerData>& layerData = layerPatternData.back ();
1153 layerData.push_back (LayerData (numNodesPrev));
1154
1155 itInputBegin = _pattern.beginInput ();
1156 itInputEnd = _pattern.endInput ();
1157 layerData.back ().setInput (itInputBegin, itInputEnd);
1158
1159 if (usesDropOut)
1160 layerData.back ().setDropOut (itDropOut);
1161
1162 }
1163
1164
1165 if (usesDropOut)
1166 itDropOut += _layers.back ().numNodes ();
1167
1168 // ---------------- prepare subsequent layers ---------------------------------------------
1169 // for each of the layers
1170 for (auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1171 {
1172 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1173 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1174
1175 auto& layer = *itLayer;
1176 layerPatternData.push_back (std::vector<LayerData>());
1177 // for each pattern, prepare a layerData
1178 for (const Pattern& _pattern : batch)
1179 {
1180 std::vector<LayerData>& layerData = layerPatternData.back ();
1181 //layerData.push_back (LayerData (numNodesPrev));
1182
1183 if (itGradientBegin == itGradientEnd)
1184 {
1185 layerData.push_back (LayerData (layer.numNodes (), itWeight,
1186 layer.activationFunction (),
1187 layer.modeOutputValues ()));
1188 }
1189 else
1190 {
1191 layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient,
1192 layer.activationFunction (),
1193 layer.inverseActivationFunction (),
1194 layer.modeOutputValues ()));
1195 }
1196
1197 if (usesDropOut)
1198 {
1199 layerData.back ().setDropOut (itDropOut);
1200 }
1201
1202 }
1203
1204 if (usesDropOut)
1205 {
1206 itDropOut += layer.numNodes ();
1207 }
1208 size_t _numWeights = layer.numWeights (numNodesPrev);
1209 totalNumWeights += _numWeights;
1210 itWeight += _numWeights;
1211 itGradient += _numWeights;
1212 numNodesPrev = layer.numNodes ();
1213 totalNumNodes += numNodesPrev;
1214
1215 }
1216 assert (totalNumWeights > 0);
1217 return layerPatternData;
1218}
1219
1220
1221
1222 template <typename LayerContainer>
1223 void Net::forwardPattern (const LayerContainer& _layers,
1224 std::vector<LayerData>& layerData) const
1225 {
1226 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1227 size_t cumulativeNodeCount = 0;
1228 for (; idxLayer < idxLayerEnd; ++idxLayer)
1229 {
1230 LayerData& prevLayerData = layerData.at (idxLayer);
1231 LayerData& currLayerData = layerData.at (idxLayer+1);
1232
1233 forward (prevLayerData, currLayerData);
1234
1235 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1236 }
1237 }
1238
1239
1240
1241
1242 template <typename LayerContainer, typename LayerPatternContainer>
1243 void Net::forwardBatch (const LayerContainer& _layers,
1244 LayerPatternContainer& layerPatternData,
1245 std::vector<double>& valuesMean,
1246 std::vector<double>& valuesStdDev,
1247 size_t trainFromLayer) const
1248 {
1249 valuesMean.clear ();
1250 valuesStdDev.clear ();
1251
1252 // ---------------------------------- loop over layers and pattern -------------------------------------------------------
1253 size_t cumulativeNodeCount = 0;
1254 for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1255 {
1256 bool doTraining = idxLayer >= trainFromLayer;
1257
1258 // get layer-pattern data for this and the corresponding one from the next layer
1259 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1260 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1261
1262 size_t numPattern = prevLayerPatternData.size ();
1263 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1264
1265 std::vector<MeanVariance> means (numNodesLayer);
1266 // ---------------- loop over layerDatas of pattern compute forward ----------------------------
1267 for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1268 {
1269 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1270 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1271
1272
1273 forward (prevLayerData, currLayerData); // feed forward
1274 }
1275
1276 // ---------------- loop over layerDatas of pattern apply non-linearities ----------------------------
1277 for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1278 {
1279 //const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1280 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1281
1282 if (doTraining)
1283 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (),
1284 currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ());
1285 else
1286 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1287 }
1288
1289 // accumulate node count
1290 cumulativeNodeCount += numNodesLayer;
1291 }
1292}
1293
1294
1295
1296
1297 template <typename OutputContainer>
1298 void Net::fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const
1299 {
1300 ModeOutputValues eModeOutput = lastLayerData.outputMode ();
1301 if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput))
1302 {
1303 outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ());
1304 }
1305 else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) ||
1306 isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput))
1307 {
1308 const auto& prob = lastLayerData.probabilities ();
1309 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1310 }
1311 else
1312 assert (false);
1313 }
1314
1315
1316
1317
1318 template <typename OutputContainer>
1319 void Net::fetchOutput (const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer) const
1320 {
1321 for (const LayerData& lastLayerData : lastLayerPatternData)
1322 fetchOutput (lastLayerData, outputContainer);
1323 }
1324
1325
1326
1327 template <typename ItWeight>
1328 std::tuple</*sumError*/double,/*sumWeights*/double> Net::computeError (const Settings& settings,
1329 std::vector<LayerData>& lastLayerData,
1330 Batch& batch,
1331 ItWeight itWeightBegin,
1332 ItWeight itWeightEnd) const
1333 {
1334 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1335// typename std::vector<LayerData>::iterator itLayerDataEnd = lastLayerData.end ();
1336
1337 typename std::vector<Pattern>::const_iterator itPattern = batch.begin ();
1338 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.end ();
1339
1340 double sumWeights (0.0);
1341 double sumError (0.0);
1342
1343 size_t idxPattern = 0;
1344// FIXME: check that iteration doesn't go beyond itLayerDataEnd!
1345 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1346 {
1347 ++idxPattern;
1348
1349 // compute E and the deltas of the computed output and the true output
1350 LayerData& layerData = (*itLayerData);
1351 const Pattern& _pattern = (*itPattern);
1352 double error = errorFunction (layerData, _pattern.output (),
1353 itWeightBegin, itWeightEnd,
1354 _pattern.weight (), settings.factorWeightDecay (),
1355 settings.regularization ());
1356 sumWeights += fabs (_pattern.weight ());
1357 sumError += error;
1358 }
1359 return std::make_tuple (sumError, sumWeights);
1360 }
1361
1362
1363
1364 template <typename Settings>
1365 void Net::backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1366 const Settings& settings,
1367 size_t trainFromLayer,
1368 size_t totalNumWeights) const
1369 {
1370 bool doTraining = layerPatternData.size () > trainFromLayer;
1371 if (doTraining) // training
1372 {
1373 // ------------- backpropagation -------------
1374 size_t idxLayer = layerPatternData.size ();
1375 for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1376 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1377 {
1378 --idxLayer;
1379 if (idxLayer <= trainFromLayer) // no training
1380 break;
1381
1382 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1383 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1384
1385 size_t idxPattern = 0;
1386// FIXME: check that itPrevLayerData doesn't go beyond itPrevLayerDataEnd!
1387 for (typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1388 itPrevLayerData = begin (prevLayerDataColl) /*, itPrevLayerDataEnd = end (prevLayerDataColl)*/;
1389 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1390 {
1391 LayerData& currLayerData = (*itCurrLayerData);
1392 LayerData& prevLayerData = *(itPrevLayerData);
1393
1394 backward (prevLayerData, currLayerData);
1395
1396 // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses)
1397 // because L1 and L2 regularization
1398 //
1399 // http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization
1400 //
1401 // L1 : -factorWeightDecay*sgn(w)/numWeights
1402 // L2 : -factorWeightDecay/numWeights
1403 update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ());
1404 }
1405 }
1406 }
1407 }
1408
1409
1410
1411/*! \brief forward propagation and backward propagation
1412 *
1413 *
1414 */
1415 template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1416 double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch,
1417 ItWeight itWeightBegin, ItWeight itWeightEnd,
1418 ItGradient itGradientBegin, ItGradient itGradientEnd,
1419 size_t trainFromLayer,
1420 OutContainer& outputContainer, bool doFetchOutput) const
1421 {
1422 Settings& settings = std::get<0>(settingsAndBatch);
1423 Batch& batch = std::get<1>(settingsAndBatch);
1424 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1425
1426 double sumError = 0.0;
1427 double sumWeights = 0.0; // -------------
1428
1429
1430 // ----------------------------- prepare layer data -------------------------------------
1431 size_t totalNumWeights (0);
1432 std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1433 batch,
1434 dropContainer,
1435 itWeightBegin,
1436 itWeightEnd,
1437 itGradientBegin,
1438 itGradientEnd,
1439 totalNumWeights);
1440
1441
1442
1443 // ---------------------------------- propagate forward ------------------------------------------------------------------
1444 std::vector<double> valuesMean;
1445 std::vector<double> valuesStdDev;
1446 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1447
1448
1449 // ------------- fetch output ------------------
1450 if (doFetchOutput)
1451 {
1452 fetchOutput (layerPatternData.back (), outputContainer);
1453 }
1454
1455
1456 // ------------- error computation -------------
1457 std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1458
1459
1460 // ------------- backpropagation -------------
1461 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1462
1463
1464 // --- compile the measures
1465 double batchSize = std::distance (std::begin (batch), std::end (batch));
1466 for (auto it = itGradientBegin; it != itGradientEnd; ++it)
1467 (*it) /= batchSize;
1468
1469
1470 sumError /= sumWeights;
1471 return sumError;
1472 }
1473
1474
1475
1476/*! \brief initialization of the weights
1477 *
1478 *
1479 */
1480 template <typename OutIterator>
1481 void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
1482 {
1483 if (eInitStrategy == WeightInitializationStrategy::XAVIER)
1484 {
1485 // input and output properties
1486 int numInput = inputSize ();
1487
1488 // compute variance and mean of input and output
1489 //...
1490
1491
1492 // compute the weights
1493 for (auto& layer: layers ())
1494 {
1495 double nIn = numInput;
1496 double stdDev = sqrt (2.0/nIn);
1497 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1498 {
1499 (*itWeight) = DNN::gaussDouble (0.0, stdDev); // factor 2.0 for ReLU
1500 ++itWeight;
1501 }
1502 numInput = layer.numNodes ();
1503 }
1504 return;
1505 }
1506
1508 {
1509 // input and output properties
1510 int numInput = inputSize ();
1511
1512 // compute variance and mean of input and output
1513 //...
1514
1515
1516 // compute the weights
1517 for (auto& layer: layers ())
1518 {
1519 double nIn = numInput;
1520 double minVal = -sqrt(2.0/nIn);
1521 double maxVal = sqrt (2.0/nIn);
1522 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1523 {
1524
1525 (*itWeight) = DNN::uniformDouble (minVal, maxVal); // factor 2.0 for ReLU
1526 ++itWeight;
1527 }
1528 numInput = layer.numNodes ();
1529 }
1530 return;
1531 }
1532
1533 if (eInitStrategy == WeightInitializationStrategy::TEST)
1534 {
1535 // input and output properties
1536 int numInput = inputSize ();
1537
1538 // compute variance and mean of input and output
1539 //...
1540
1541
1542 // compute the weights
1543 for (auto& layer: layers ())
1544 {
1545// double nIn = numInput;
1546 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1547 {
1548 (*itWeight) = DNN::gaussDouble (0.0, 0.1);
1549 ++itWeight;
1550 }
1551 numInput = layer.numNodes ();
1552 }
1553 return;
1554 }
1555
1556 if (eInitStrategy == WeightInitializationStrategy::LAYERSIZE)
1557 {
1558 // input and output properties
1559 int numInput = inputSize ();
1560
1561 // compute variance and mean of input and output
1562 //...
1563
1564
1565 // compute the weights
1566 for (auto& layer: layers ())
1567 {
1568 double nIn = numInput;
1569 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1570 {
1571 (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn))); // factor 2.0 for ReLU
1572 ++itWeight;
1573 }
1574 numInput = layer.numNodes ();
1575 }
1576 return;
1577 }
1578
1579 }
1580
1581
1582
1583
1584
1585/*! \brief compute the error function
1586 *
1587 *
1588 */
1589 template <typename Container, typename ItWeight>
1590 double Net::errorFunction (LayerData& layerData,
1591 Container truth,
1592 ItWeight itWeight,
1593 ItWeight itWeightEnd,
1594 double patternWeight,
1595 double factorWeightDecay,
1596 EnumRegularization eRegularization) const
1597 {
1598 double error (0);
1599 switch (m_eErrorFunction)
1600 {
1602 {
1603 error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth),
1604 layerData.deltasBegin (), layerData.deltasEnd (),
1605 layerData.inverseActivationFunction (),
1606 patternWeight);
1607 break;
1608 }
1610 {
1612 std::vector<double> probabilities = layerData.probabilities ();
1613 error = crossEntropy (begin (probabilities), end (probabilities),
1614 begin (truth), end (truth),
1615 layerData.deltasBegin (), layerData.deltasEnd (),
1616 layerData.inverseActivationFunction (),
1617 patternWeight);
1618 break;
1619 }
1621 {
1622 std::cout << "softmax." << std::endl;
1624 std::vector<double> probabilities = layerData.probabilities ();
1625 error = softMaxCrossEntropy (begin (probabilities), end (probabilities),
1626 begin (truth), end (truth),
1627 layerData.deltasBegin (), layerData.deltasEnd (),
1628 layerData.inverseActivationFunction (),
1629 patternWeight);
1630 break;
1631 }
1632 }
1633 if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE)
1634 {
1635 error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
1636 }
1637 return error;
1638 }
1639
1640
1641
1642
1643
1644
1645
1646// /*! \brief pre-training
1647// *
1648// * in development
1649// */
1650// template <typename Minimizer>
1651// void Net::preTrain (std::vector<double>& weights,
1652// std::vector<Pattern>& trainPattern,
1653// const std::vector<Pattern>& testPattern,
1654// Minimizer& minimizer, Settings& settings)
1655// {
1656// auto itWeightGeneral = std::begin (weights);
1657// std::vector<Pattern> prePatternTrain (trainPattern.size ());
1658// std::vector<Pattern> prePatternTest (testPattern.size ());
1659
1660// size_t _inputSize = inputSize ();
1661
1662// // transform pattern using the created preNet
1663// auto initializePrePattern = [&](const std::vector<Pattern>& pttrnInput, std::vector<Pattern>& pttrnOutput)
1664// {
1665// pttrnOutput.clear ();
1666// std::transform (std::begin (pttrnInput), std::end (pttrnInput),
1667// std::back_inserter (pttrnOutput),
1668// [](const Pattern& p)
1669// {
1670// Pattern pat (p.input (), p.input (), p.weight ());
1671// return pat;
1672// });
1673// };
1674
1675// initializePrePattern (trainPattern, prePatternTrain);
1676// initializePrePattern (testPattern, prePatternTest);
1677
1678// std::vector<double> originalDropFractions = settings.dropFractions ();
1679
1680// for (auto& _layer : layers ())
1681// {
1682// // compute number of weights (as a function of the number of incoming nodes)
1683// // fetch number of nodes
1684// size_t numNodes = _layer.numNodes ();
1685// size_t _numWeights = _layer.numWeights (_inputSize);
1686
1687// // ------------------
1688// DNN::Net preNet;
1689// if (!originalDropFractions.empty ())
1690// {
1691// originalDropFractions.erase (originalDropFractions.begin ());
1692// settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ());
1693// }
1694// std::vector<double> preWeights;
1695
1696// // define the preNet (pretraining-net) for this layer
1697// // outputSize == inputSize, because this is an autoencoder;
1698// preNet.setInputSize (_inputSize);
1699// preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ()));
1700// preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT));
1701// preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES);
1702// preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder)
1703
1704// // initialize weights
1705// preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM,
1706// std::back_inserter (preWeights));
1707
1708// // overwrite already existing weights from the "general" weights
1709// std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ());
1710// std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer
1711
1712
1713// // train the "preNet"
1714// preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings);
1715
1716// // fetch the pre-trained weights (without the output part of the autoencoder)
1717// std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral);
1718
1719// // advance the iterator on the incoming weights
1720// itWeightGeneral += _numWeights;
1721
1722// // remove the weights of the output layer of the preNet
1723// preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ());
1724
1725// // remove the outputLayer of the preNet
1726// preNet.removeLayer ();
1727
1728// // set the output size to the number of nodes in the new output layer (== last hidden layer)
1729// preNet.setOutputSize (numNodes);
1730
1731// // transform pattern using the created preNet
1732// auto proceedPattern = [&](std::vector<Pattern>& pttrn)
1733// {
1734// std::vector<Pattern> newPttrn;
1735// std::for_each (std::begin (pttrn), std::end (pttrn),
1736// [&preNet,&preWeights,&newPttrn](Pattern& p)
1737// {
1738// std::vector<double> output = preNet.compute (p.input (), preWeights);
1739// Pattern pat (output, output, p.weight ());
1740// newPttrn.push_back (pat);
1741// // p = pat;
1742// });
1743// return newPttrn;
1744// };
1745
1746
1747// prePatternTrain = proceedPattern (prePatternTrain);
1748// prePatternTest = proceedPattern (prePatternTest);
1749
1750
1751// // the new input size is the output size of the already reduced preNet
1752// _inputSize = preNet.layers ().back ().numNodes ();
1753// }
1754// }
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771 } // namespace DNN
1772} // namespace TMVA
1773
1774#endif
#define f(i)
Definition: RSha256.hxx:104
#define g(i)
Definition: RSha256.hxx:105
static RooMathCoreReg dummy
@ kMagenta
Definition: Rtypes.h:64
@ kBlue
Definition: Rtypes.h:64
double sqrt(double)
double log(double)
char * Form(const char *fmt,...)
#define MATH_UNUSED(var)
Definition: Util.h:33
Double_t(* Function)(Double_t)
Definition: Functor.C:4
Definition: Pattern.h:8
double weight() const
Definition: Pattern.h:74
std::vector< double >::const_iterator const_iterator
Definition: Pattern.h:12
std::vector< double > & output()
Definition: Pattern.h:84
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:236
const_iterator begin() const
Definition: NeuralNet.h:245
const_iterator end() const
Definition: NeuralNet.h:246
size_t size() const
Definition: NeuralNet.h:248
LayerData holds the data of one layer.
Definition: NeuralNet.h:438
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:587
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:601
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:595
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:592
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:612
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:596
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:611
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:593
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:449
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:586
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
Definition: NeuralNet.icc:1243
bool * fExitFromTraining
Definition: NeuralNet.h:1284
std::vector< Layer > m_layers
layer-structure-data
Definition: NeuralNet.h:1279
UInt_t * fIPyMaxIter
Definition: NeuralNet.h:1285
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
Definition: NeuralNet.icc:1037
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
Definition: NeuralNet.icc:1298
size_t inputSize() const
input size of the DNN
Definition: NeuralNet.h:1105
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition: NeuralNet.h:1276
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:710
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition: NeuralNet.h:1252
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
Definition: NeuralNet.icc:1111
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1481
size_t outputSize() const
output size of the DNN
Definition: NeuralNet.h:1106
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
Definition: NeuralNet.icc:1590
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
Definition: NeuralNet.icc:1416
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
Definition: NeuralNet.icc:939
UInt_t * fIPyCurrentIter
Definition: NeuralNet.h:1285
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
Definition: NeuralNet.icc:1070
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
Definition: NeuralNet.icc:650
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
Definition: NeuralNet.cxx:575
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
Definition: NeuralNet.cxx:543
IPythonInteractive * fInteractive
Definition: NeuralNet.h:1283
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
Definition: NeuralNet.icc:1328
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
Definition: NeuralNet.icc:1223
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Definition: NeuralNet.icc:1365
Settings for the training of the neural net.
Definition: NeuralNet.h:737
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:822
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:820
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:834
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:775
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:812
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:813
virtual bool hasConverged(double testError)
has this training converged already?
Definition: NeuralNet.cxx:488
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:806
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:795
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:769
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:828
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:788
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:830
virtual void startTrainCycle()
Definition: NeuralNet.h:789
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:773
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:776
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:835
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:825
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:774
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:816
size_t dropRepetitions() const
Definition: NeuralNet.h:768
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:826
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:811
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:373
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:377
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:374
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:372
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:376
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
Definition: NeuralNet.icc:269
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:212
Basic string class.
Definition: TString.h:131
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
double T(double x)
Definition: ChebyshevPol.h:34
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151
RooCmdArg Minimizer(const char *type, const char *alg=0)
std::shared_ptr< std::function< double(double)> > InvGauss
Definition: NeuralNet.cxx:14
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:43
std::shared_ptr< std::function< double(double)> > SymmReLU
Definition: NeuralNet.cxx:30
std::shared_ptr< std::function< double(double)> > TanhShift
Definition: NeuralNet.cxx:31
std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.cxx:29
std::shared_ptr< std::function< double(double)> > InvSigmoid
Definition: NeuralNet.cxx:18
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:544
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
T uniformFromTo(T from, T to)
Definition: NeuralNet.icc:32
std::shared_ptr< std::function< double(double)> > SoftPlus
Definition: NeuralNet.cxx:27
EnumRegularization
Definition: NeuralNet.h:174
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:410
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:570
std::shared_ptr< std::function< double(double)> > ZeroFnc
Definition: NeuralNet.cxx:28
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
std::shared_ptr< std::function< double(double)> > InvSoftSign
Definition: NeuralNet.cxx:20
std::shared_ptr< std::function< double(double)> > InvGaussComplement
Definition: NeuralNet.cxx:15
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:216
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:456
std::shared_ptr< std::function< double(double)> > InvTanh
Definition: NeuralNet.cxx:22
std::shared_ptr< std::function< double(double)> > Linear
Definition: NeuralNet.cxx:24
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1057
std::shared_ptr< std::function< double(double)> > InvReLU
Definition: NeuralNet.cxx:17
std::shared_ptr< std::function< double(double)> > GaussComplement
Definition: NeuralNet.cxx:13
std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.cxx:12
std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.cxx:26
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:35
ModeOutputValues
Definition: NeuralNet.h:180
std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.cxx:32
std::shared_ptr< std::function< double(double)> > InvSoftPlus
Definition: NeuralNet.cxx:19
std::shared_ptr< std::function< double(double)> > ReLU
Definition: NeuralNet.cxx:25
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
Definition: NeuralNet.icc:207
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1301
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:213
std::shared_ptr< std::function< double(double)> > InvTanhShift
Definition: NeuralNet.cxx:23
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:181
std::vector< char > DropContainer
Definition: NeuralNet.h:220
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
std::shared_ptr< std::function< double(double)> > InvSymmReLU
Definition: NeuralNet.cxx:21
std::shared_ptr< std::function< double(double)> > InvLinear
Definition: NeuralNet.cxx:16
create variable transformations
constexpr Double_t E()
Base of natural log:
Definition: TMath.h:97
static void output(int code)
Definition: gifencode.c:226