Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
NeuralNet.icc
Go to the documentation of this file.
1#ifndef TMVA_NEURAL_NET_I
2#define TMVA_NEURAL_NET_I
3
4#ifndef TMVA_NEURAL_NET
5#error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."
6#endif // TMVA_NEURAL_NET
7#pragma once
8#ifndef _MSC_VER
9#pragma GCC diagnostic ignored "-Wunused-variable"
10#endif
11
12#include "Math/Util.h"
13
14#include "TMVA/Pattern.h"
15#include "TMVA/MethodBase.h"
16
17#include <tuple>
18#include <future>
19#include <random>
20
21namespace TMVA
22{
23 namespace DNN
24 {
25
26
27
28
29
30
31
32
33 template <typename T>
34 T uniformFromTo (T from, T to)
35 {
36 return from + (rand ()* (to - from)/RAND_MAX);
37 }
38
39
40
41 template <typename Container, typename T>
42 void uniformDouble (Container& container, T maxValue)
43 {
44 for (auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
45 {
46// (*it) = uniformFromTo (-1.0*maxValue, 1.0*maxValue);
47 (*it) = TMVA::DNN::uniformFromTo (-1.0*maxValue, 1.0*maxValue);
48 }
49 }
50
51
52 extern std::shared_ptr<std::function<double(double)>> ZeroFnc;
53
54
55 extern std::shared_ptr<std::function<double(double)>> Sigmoid;
56 extern std::shared_ptr<std::function<double(double)>> InvSigmoid;
57
58 extern std::shared_ptr<std::function<double(double)>> Tanh;
59 extern std::shared_ptr<std::function<double(double)>> InvTanh;
60
61 extern std::shared_ptr<std::function<double(double)>> Linear;
62 extern std::shared_ptr<std::function<double(double)>> InvLinear;
63
64 extern std::shared_ptr<std::function<double(double)>> SymmReLU;
65 extern std::shared_ptr<std::function<double(double)>> InvSymmReLU;
66
67 extern std::shared_ptr<std::function<double(double)>> ReLU;
68 extern std::shared_ptr<std::function<double(double)>> InvReLU;
69
70 extern std::shared_ptr<std::function<double(double)>> SoftPlus;
71 extern std::shared_ptr<std::function<double(double)>> InvSoftPlus;
72
73 extern std::shared_ptr<std::function<double(double)>> TanhShift;
74 extern std::shared_ptr<std::function<double(double)>> InvTanhShift;
75
76 extern std::shared_ptr<std::function<double(double)>> SoftSign;
77 extern std::shared_ptr<std::function<double(double)>> InvSoftSign;
78
79 extern std::shared_ptr<std::function<double(double)>> Gauss;
80 extern std::shared_ptr<std::function<double(double)>> InvGauss;
81
82 extern std::shared_ptr<std::function<double(double)>> GaussComplement;
83 extern std::shared_ptr<std::function<double(double)>> InvGaussComplement;
84
85
86/*! \brief apply weights using drop-out; for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
87 *
88 * itDrop correlates with itSourceBegin
89 */
90template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItTarget, typename ItDrop>
91 void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd,
92 ItWeight itWeight,
93 ItTarget itTargetBegin, ItTarget itTargetEnd,
94 ItDrop itDrop)
95 {
96 for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
97 {
98 for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
99 {
100 if (!HasDropOut || *itDrop)
101 (*itTarget) += (*itSource) * (*itWeight);
102 ++itWeight;
103 }
104 if (HasDropOut) ++itDrop;
105 }
106 }
107
108
109
110
111
112
113/*! \brief apply weights backwards (for backprop); for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
114 *
115 * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards)
116 */
117template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev, typename ItDrop>
118 void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd,
119 ItWeight itWeight,
120 ItPrev itPrevBegin, ItPrev itPrevEnd,
121 ItDrop itDrop)
122 {
123 for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
124 {
125 for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
126 {
127 if (!HasDropOut || *itDrop)
128 (*itPrev) += (*itCurr) * (*itWeight);
129 ++itWeight;
130 }
131 if (HasDropOut) ++itDrop;
132 }
133 }
134
135
136
137
138
139
140
141/*! \brief apply the activation functions
142 *
143 *
144 */
145
146 template <typename ItValue, typename Fnc>
147 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc)
148 {
149 while (itValue != itValueEnd)
150 {
151 auto& value = (*itValue);
152 value = (*fnc.get ()) (value);
153
154 ++itValue;
155 }
156 }
157
158
159/*! \brief apply the activation functions and compute the gradient
160 *
161 *
162 */
163 template <typename ItValue, typename Fnc, typename InvFnc, typename ItGradient>
164 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
165 {
166 while (itValue != itValueEnd)
167 {
168 auto& value = (*itValue);
169 value = (*fnc.get ()) (value);
170 (*itGradient) = (*invFnc.get ()) (value);
171
172 ++itValue; ++itGradient;
173 }
174 }
175
176
177
178/*! \brief update the gradients
179 *
180 *
181 */
182 template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
183 void update (ItSource itSource, ItSource itSourceEnd,
184 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
185 ItTargetGradient itTargetGradientBegin,
186 ItGradient itGradient)
187 {
188 while (itSource != itSourceEnd)
189 {
190 auto itTargetDelta = itTargetDeltaBegin;
191 auto itTargetGradient = itTargetGradientBegin;
192 while (itTargetDelta != itTargetDeltaEnd)
193 {
194 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
195 ++itTargetDelta; ++itTargetGradient; ++itGradient;
196 }
197 ++itSource;
198 }
199 }
200
201
202
203
204/*! \brief compute the regularization (L1, L2)
205 *
206 *
207 */
208 template <EnumRegularization Regularization>
209 inline double computeRegularization (double weight, const double& factorWeightDecay)
210 {
211 MATH_UNUSED(weight);
212 MATH_UNUSED(factorWeightDecay);
213
214 return 0;
215 }
216
217// L1 regularization
218 template <>
219 inline double computeRegularization<EnumRegularization::L1> (double weight, const double& factorWeightDecay)
220 {
221 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
222 }
223
224// L2 regularization
225 template <>
226 inline double computeRegularization<EnumRegularization::L2> (double weight, const double& factorWeightDecay)
227 {
228 return factorWeightDecay * weight;
229 }
230
231
232/*! \brief update the gradients, using regularization
233 *
234 *
235 */
236 template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
237 void update (ItSource itSource, ItSource itSourceEnd,
238 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
239 ItTargetGradient itTargetGradientBegin,
240 ItGradient itGradient,
241 ItWeight itWeight, double weightDecay)
242 {
243 // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights
244 while (itSource != itSourceEnd)
245 {
246 auto itTargetDelta = itTargetDeltaBegin;
247 auto itTargetGradient = itTargetGradientBegin;
248 while (itTargetDelta != itTargetDeltaEnd)
249 {
250 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,weightDecay);
251 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
252 }
253 ++itSource;
254 }
255 }
256
257
258
259
260
261
262#define USELOCALWEIGHTS 1
263
264
265
266/*! \brief implementation of the steepest gradient descent algorithm
267 *
268 * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle
269 */
270 template <typename Function, typename Weights, typename PassThrough>
271 double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough)
272 {
273 size_t numWeights = weights.size ();
274 // std::vector<double> gradients (numWeights, 0.0);
275 m_localGradients.assign (numWeights, 0.0);
276 // std::vector<double> localWeights (begin (weights), end (weights));
277 // m_localWeights.reserve (numWeights);
278 m_localWeights.assign (begin (weights), end (weights));
279
280 double E = 1e10;
281 if (m_prevGradients.size () != numWeights)
282 {
283 m_prevGradients.clear ();
284 m_prevGradients.assign (weights.size (), 0);
285 }
286
287 bool success = true;
288 size_t currentRepetition = 0;
289 while (success)
290 {
291 if (currentRepetition >= m_repetitions)
292 break;
293
294 m_localGradients.assign (numWeights, 0.0);
295
296 // --- nesterov momentum ---
297 // apply momentum before computing the new gradient
298 auto itPrevG = begin (m_prevGradients);
299 auto itPrevGEnd = end (m_prevGradients);
300 auto itLocWeight = begin (m_localWeights);
301 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
302 {
303 (*itPrevG) *= m_beta;
304 (*itLocWeight) += (*itPrevG);
305 }
306
307 E = fitnessFunction (passThrough, m_localWeights, m_localGradients);
308// plotGradients (gradients);
309// plotWeights (localWeights);
310
311 double alpha = gaussDouble (m_alpha, m_alpha/2.0);
312// double alpha = m_alpha;
313
314 auto itG = begin (m_localGradients);
315 auto itGEnd = end (m_localGradients);
316 itPrevG = begin (m_prevGradients);
317 double maxGrad = 0.0;
318 for (; itG != itGEnd; ++itG, ++itPrevG)
319 {
320 double currGrad = (*itG);
321 double prevGrad = (*itPrevG);
322 currGrad *= alpha;
323
324 //(*itPrevG) = m_beta * (prevGrad + currGrad);
325 currGrad += prevGrad;
326 (*itG) = currGrad;
327 (*itPrevG) = currGrad;
328
329 if (std::fabs (currGrad) > maxGrad)
330 maxGrad = currGrad;
331 }
332
333 if (maxGrad > 1)
334 {
335 m_alpha /= 2;
336 std::cout << "\nlearning rate reduced to " << m_alpha << std::endl;
337 std::for_each (weights.begin (), weights.end (), [maxGrad](double& w)
338 {
339 w /= maxGrad;
340 });
341 m_prevGradients.clear ();
342 }
343 else
344 {
345 auto itW = std::begin (weights);
346 std::for_each (std::begin (m_localGradients), std::end (m_localGradients), [&itW](double& g)
347 {
348 *itW += g;
349 ++itW;
350 });
351 }
352
353 ++currentRepetition;
354 }
355 return E;
356 }
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377/*! \brief sum of squares error function
378 *
379 *
380 */
381 template <typename ItOutput, typename ItTruth, typename ItDelta, typename InvFnc>
382 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight)
383 {
384 double errorSum = 0.0;
385
386 // output - truth
387 ItTruth itTruth = itTruthBegin;
388 bool hasDeltas = (itDelta != itDeltaEnd);
389 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
390 {
391// assert (itTruth != itTruthEnd);
392 double output = (*itOutput);
393 double error = output - (*itTruth);
394 if (hasDeltas)
395 {
396 (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
397 ++itDelta;
398 }
399 errorSum += error*error * patternWeight;
400 }
401
402 return 0.5*errorSum;
403 }
404
405
406
407/*! \brief cross entropy error function
408 *
409 *
410 */
411 template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
412 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
413 {
414 bool hasDeltas = (itDelta != itDeltaEnd);
415
416 double errorSum = 0.0;
417 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
418 {
419 double probability = *itProbability;
420 double truth = *itTruthBegin;
421 /* truth = truth < 0.1 ? 0.1 : truth; */
422 /* truth = truth > 0.9 ? 0.9 : truth; */
423 truth = truth < 0.5 ? 0.1 : 0.9;
424 if (hasDeltas)
425 {
426 double delta = probability - truth;
427 (*itDelta) = delta*patternWeight;
428// (*itDelta) = (*itInvActFnc)(probability) * delta * patternWeight;
429 ++itDelta;
430 }
431 double error (0);
432 if (probability == 0) // protection against log (0)
433 {
434 if (truth >= 0.5)
435 error += 1.0;
436 }
437 else if (probability == 1)
438 {
439 if (truth < 0.5)
440 error += 1.0;
441 }
442 else
443 error += - (truth * log (probability) + (1.0-truth) * log (1.0-probability)); // cross entropy function
444 errorSum += error * patternWeight;
445
446 }
447 return errorSum;
448 }
449
450
451
452
453/*! \brief soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
454 *
455 *
456 */
457 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
458 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
459 {
460 double errorSum = 0.0;
461
462 bool hasDeltas = (itDelta != itDeltaEnd);
463 // output - truth
464 ItTruth itTruth = itTruthBegin;
465 for (auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
466 {
467// assert (itTruth != itTruthEnd);
468 double probability = (*itProbability);
469 double truth = (*itTruth);
470 if (hasDeltas)
471 {
472 (*itDelta) = probability - truth;
473// (*itDelta) = (*itInvActFnc)(sm) * delta * patternWeight;
474 ++itDelta; //++itInvActFnc;
475 }
476 double error (0);
477
478 error += truth * log (probability);
479 errorSum += error;
480 }
481
482 return -errorSum * patternWeight;
483 }
484
485
486
487
488
489
490
491
492
493/*! \brief compute the weight decay for regularization (L1 or L2)
494 *
495 *
496 */
497 template <typename ItWeight>
498 double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
499 {
500 if (eRegularization == EnumRegularization::L1)
501 {
502 // weight decay (regularization)
503 double w = 0;
504 size_t n = 0;
505 for (; itWeight != itWeightEnd; ++itWeight, ++n)
506 {
507 double weight = (*itWeight);
508 w += std::fabs (weight);
509 }
510 return error + 0.5 * w * factorWeightDecay / n;
511 }
512 else if (eRegularization == EnumRegularization::L2)
513 {
514 // weight decay (regularization)
515 double w = 0;
516 size_t n = 0;
517 for (; itWeight != itWeightEnd; ++itWeight, ++n)
518 {
519 double weight = (*itWeight);
520 w += weight*weight;
521 }
522 return error + 0.5 * w * factorWeightDecay / n;
523 }
524 else
525 return error;
526 }
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541/*! \brief apply the weights (and functions) in forward direction of the DNN
542 *
543 *
544 */
545 template <typename LAYERDATA>
546 void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
547 {
548 if (prevLayerData.hasDropOut ())
549 {
550 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
551 currLayerData.weightsBegin (),
552 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
553 prevLayerData.dropOut ());
554 }
555 else
556 {
557 bool dummy = true;
558 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
559 currLayerData.weightsBegin (),
560 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
561 &dummy); // dummy to turn on all nodes (no drop out)
562 }
563 }
564
565
566
567/*! \brief backward application of the weights (back-propagation of the error)
568 *
569 *
570 */
571template <typename LAYERDATA>
572 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
573{
574 if (prevLayerData.hasDropOut ())
575 {
576 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
577 currLayerData.weightsBegin (),
578 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
579 prevLayerData.dropOut ());
580 }
581 else
582 {
583 bool dummy = true;
584 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
585 currLayerData.weightsBegin (),
586 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
587 &dummy); // dummy to use all nodes (no drop out)
588 }
589}
590
591
592
593
594
595/*! \brief update the node values
596 *
597 *
598 */
599 template <typename LAYERDATA>
600 void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization)
601 {
602 // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights
603 if (factorWeightDecay != 0.0) // has weight regularization
604 if (regularization == EnumRegularization::L1) // L1 regularization ( sum(|w|) )
605 {
606 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
607 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
608 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
609 currLayerData.weightsBegin (), factorWeightDecay);
610 }
611 else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) )
612 {
613 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
614 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
615 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
616 currLayerData.weightsBegin (), factorWeightDecay);
617 }
618 else
619 {
620 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
621 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
622 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
623 }
624
625 else
626 { // no weight regularization
627 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
628 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
629 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
630 }
631 }
632
633
634
635
636
637
638
639
640
641
642
643
644/*! \brief compute the drop-out-weight factor
645 *
646 * when using drop-out a fraction of the nodes is turned off at each cycle of the computation
647 * once all nodes are turned on again (for instances when the test samples are evaluated),
648 * the weights have to be adjusted to account for the different number of active nodes
649 * this function computes the factor and applies it to the weights
650 */
651 template <typename WeightsType, typename DropProbabilities>
652 void Net::dropOutWeightFactor (WeightsType& weights,
653 const DropProbabilities& drops,
654 bool inverse)
655 {
656 if (drops.empty () || weights.empty ())
657 return;
658
659 auto itWeight = std::begin (weights);
660 auto itWeightEnd = std::end (weights);
661 auto itDrop = std::begin (drops);
662 auto itDropEnd = std::end (drops);
663 size_t numNodesPrev = inputSize ();
664 double dropFractionPrev = *itDrop;
665 ++itDrop;
666
667 for (auto& layer : layers ())
668 {
669 if (itDrop == itDropEnd)
670 break;
671
672 size_t _numNodes = layer.numNodes ();
673
674 double dropFraction = *itDrop;
675 double pPrev = 1.0 - dropFractionPrev;
676 double p = 1.0 - dropFraction;
677 p *= pPrev;
678
679 if (inverse)
680 {
681 p = 1.0/p;
682 }
683 size_t _numWeights = layer.numWeights (numNodesPrev);
684 for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
685 {
686 if (itWeight == itWeightEnd)
687 break;
688
689 *itWeight *= p;
690 ++itWeight;
691 }
692 numNodesPrev = _numNodes;
693 dropFractionPrev = dropFraction;
694 ++itDrop;
695 }
696 }
697
698
699
700
701
702
703/*! \brief execute the training until convergence emerges
704 *
705 * \param weights the container with the weights (synapses)
706 * \param trainPattern the pattern for the training
707 * \param testPattern the pattern for the testing
708 * \param minimizer the minimizer (e.g. steepest gradient descent) to be used
709 * \param settings the settings for the training (e.g. multithreading or not, regularization etc.)
710 */
711 template <typename Minimizer>
712 double Net::train (std::vector<double>& weights,
713 std::vector<Pattern>& trainPattern,
714 const std::vector<Pattern>& testPattern,
715 Minimizer& minimizer,
716 Settings& settings)
717 {
718// std::cout << "START TRAINING" << std::endl;
719 settings.startTrainCycle ();
720
721 // JsMVA progress bar maximum (100%)
722 if (fIPyMaxIter) *fIPyMaxIter = 100;
723
724 settings.pads (4);
725 settings.create ("trainErrors", 100, 0, 100, 100, 0,1);
726 settings.create ("testErrors", 100, 0, 100, 100, 0,1);
727
728 size_t cycleCount = 0;
729 size_t testCycleCount = 0;
730 double testError = 1e20;
731 double trainError = 1e20;
732 size_t dropOutChangeCount = 0;
733
734 DropContainer dropContainer;
735 DropContainer dropContainerTest;
736 const std::vector<double>& dropFractions = settings.dropFractions ();
737 bool isWeightsForDrop = false;
738
739
740 // until convergence
741 do
742 {
743 ++cycleCount;
744
745 // if dropOut enabled
746 size_t dropIndex = 0;
747 if (!dropFractions.empty () && dropOutChangeCount % settings.dropRepetitions () == 0)
748 {
749 // fill the dropOut-container
750 dropContainer.clear ();
751 size_t _numNodes = inputSize ();
752 double dropFraction = 0.0;
753 dropFraction = dropFractions.at (dropIndex);
754 ++dropIndex;
755 fillDropContainer (dropContainer, dropFraction, _numNodes);
756 for (auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
757 {
758 auto& layer = *itLayer;
759 _numNodes = layer.numNodes ();
760 // how many nodes have to be dropped
761 dropFraction = 0.0;
762 if (dropFractions.size () > dropIndex)
763 dropFraction = dropFractions.at (dropIndex);
764
765 fillDropContainer (dropContainer, dropFraction, _numNodes);
766 }
767 isWeightsForDrop = true;
768 }
769
770 // execute training cycle
771 trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
772
773
774 // ------ check if we have to execute a test ------------------
775 bool hasConverged = false;
776 if (testCycleCount % settings.testRepetitions () == 0) // we test only everye "testRepetitions" repetition
777 {
778 if (isWeightsForDrop)
779 {
780 dropOutWeightFactor (weights, dropFractions);
781 isWeightsForDrop = false;
782 }
783
784
785 testError = 0;
786 //double weightSum = 0;
787 settings.startTestCycle ();
788 if (settings.useMultithreading ())
789 {
790 size_t numThreads = std::thread::hardware_concurrency ();
791 size_t patternPerThread = testPattern.size () / numThreads;
792 std::vector<Batch> batches;
793 auto itPat = testPattern.begin ();
794 // auto itPatEnd = testPattern.end ();
795 for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
796 {
797 batches.push_back (Batch (itPat, itPat + patternPerThread));
798 itPat += patternPerThread;
799 }
800 if (itPat != testPattern.end ())
801 batches.push_back (Batch (itPat, testPattern.end ()));
802
803 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
804 for (auto& batch : batches)
805 {
806 // -------------------- execute each of the batch ranges on a different thread -------------------------------
807 futures.push_back (
808 std::async (std::launch::async, [&]()
809 {
810 std::vector<double> localOutput;
811 pass_through_type passThrough (settings, batch, dropContainerTest);
812 double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput);
813 return std::make_tuple (testBatchError, localOutput);
814 })
815 );
816 }
817
818 auto itBatch = batches.begin ();
819 for (auto& f : futures)
820 {
821 std::tuple<double,std::vector<double>> result = f.get ();
822 testError += std::get<0>(result) / batches.size ();
823 std::vector<double> output = std::get<1>(result);
824 if (output.size() == (outputSize() - 1) * itBatch->size())
825 {
826 auto output_iterator = output.begin();
827 for (auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
828 {
829 for (size_t output_index = 1; output_index < outputSize(); ++output_index)
830 {
831 settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
832 (*pattern_it).weight ());
833 ++output_iterator;
834 }
835 }
836 }
837 ++itBatch;
838 }
839
840 }
841 else
842 {
843 std::vector<double> output;
844 //for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it)
845 {
846 //const Pattern& p = (*it);
847 //double weight = p.weight ();
848 //Batch batch (it, it+1);
849 Batch batch (begin (testPattern), end (testPattern));
850 output.clear ();
851 pass_through_type passThrough (settings, batch, dropContainerTest);
852 double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output);
853 if (output.size() == (outputSize() - 1) * batch.size())
854 {
855 auto output_iterator = output.begin();
856 for (auto pattern_it = batch.begin(); pattern_it != batch.end(); ++pattern_it)
857 {
858 for (size_t output_index = 1; output_index < outputSize(); ++output_index)
859 {
860 settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
861 (*pattern_it).weight ());
862 ++output_iterator;
863 }
864 }
865 }
866 testError += testPatternError; /// batch.size ();
867 }
868 // testError /= testPattern.size ();
869 }
870 settings.endTestCycle ();
871// testError /= weightSum;
872
873 settings.computeResult (*this, weights);
874
875 hasConverged = settings.hasConverged (testError);
876 if (!hasConverged && !isWeightsForDrop)
877 {
878 dropOutWeightFactor (weights, dropFractions, true); // inverse
879 isWeightsForDrop = true;
880 }
881 }
882 ++testCycleCount;
883 ++dropOutChangeCount;
884
885
886// settings.resetPlot ("errors");
887 settings.addPoint ("trainErrors", cycleCount, trainError);
888 settings.addPoint ("testErrors", cycleCount, testError);
889 settings.plot ("trainErrors", "C", 1, kBlue);
890 settings.plot ("testErrors", "C", 1, kMagenta);
891
892
893 // setup error plots and progress bar variables for JsMVA
894 if (fInteractive){
895 fInteractive->AddPoint(cycleCount, trainError, testError);
896 if (*fExitFromTraining) break;
897 *fIPyCurrentIter = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
898 }
899
900 if (hasConverged)
901 break;
902
903 if ((int)cycleCount % 10 == 0) {
904
905 TString convText = Form( "(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
906 trainError,
907 testError,
908 (int)cycleCount,
909 (int)settings.convergenceCount (),
910 (int)settings.maxConvergenceCount ());
911 double progress = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
912 settings.cycle (progress, convText);
913 }
914 }
915 while (true);
916 settings.endTrainCycle (trainError);
917
918 TString convText = Form( "(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (int)cycleCount);
919 double progress = 100*(double)settings.maxConvergenceCount() /(double)settings.convergenceSteps ();
920 settings.cycle (progress, convText);
921
922 return testError;
923 }
924
925
926
927/*! \brief execute a single training cycle
928 *
929 * uses multithreading if turned on
930 *
931 * \param minimizer the minimizer to be used (e.g. SGD)
932 * \param weights the weight container with all the synapse weights
933 * \param itPatternBegin begin of the pattern container
934 * \param itPatternEnd the end of the pattern container
935 * \param settings the settings for this training (e.g. multithreading or not, regularization, etc.)
936 * \param dropContainer the data for dropping-out nodes (regularization technique)
937 */
938 template <typename Iterator, typename Minimizer>
939 inline double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weights,
940 Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer)
941 {
942 double error = 0.0;
943 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
944 size_t numBatches = numPattern/settings.batchSize ();
945 size_t numBatches_stored = numBatches;
946
947 std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});
948 Iterator itPatternBatchBegin = itPatternBegin;
949 Iterator itPatternBatchEnd = itPatternBatchBegin;
950
951 // create batches
952 std::vector<Batch> batches;
953 while (numBatches > 0)
954 {
955 std::advance (itPatternBatchEnd, settings.batchSize ());
956 batches.push_back (Batch (itPatternBatchBegin, itPatternBatchEnd));
957 itPatternBatchBegin = itPatternBatchEnd;
958 --numBatches;
959 }
960
961 // add the last pattern to the last batch
962 if (itPatternBatchEnd != itPatternEnd)
963 batches.push_back (Batch (itPatternBatchEnd, itPatternEnd));
964
965
966 ///< turn on multithreading if requested
967 if (settings.useMultithreading ())
968 {
969 // -------------------- divide the batches into bunches for each thread --------------
970 size_t numThreads = std::thread::hardware_concurrency ();
971 size_t batchesPerThread = batches.size () / numThreads;
972 typedef std::vector<Batch>::iterator batch_iterator;
973 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
974 batch_iterator itBatchBegin = std::begin (batches);
975 batch_iterator itBatchCurrEnd = std::begin (batches);
976 batch_iterator itBatchEnd = std::end (batches);
977 for (size_t iT = 0; iT < numThreads; ++iT)
978 {
979 if (iT == numThreads-1)
980 itBatchCurrEnd = itBatchEnd;
981 else
982 std::advance (itBatchCurrEnd, batchesPerThread);
983 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
984 itBatchBegin = itBatchCurrEnd;
985 }
986
987 // -------------------- loop over batches -------------------------------------------
988 std::vector<std::future<double>> futures;
989 for (auto& batchRange : batchVec)
990 {
991 // -------------------- execute each of the batch ranges on a different thread -------------------------------
992 futures.push_back (
993 std::async (std::launch::async, [&]()
994 {
995 double localError = 0.0;
996 for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
997 {
998 Batch& batch = *it;
999 pass_through_type settingsAndBatch (settings, batch, dropContainer);
1000 Minimizer minimizerClone (minimizer);
1001 localError += minimizerClone ((*this), weights, settingsAndBatch); /// call the minimizer
1002 }
1003 return localError;
1004 })
1005 );
1006 }
1007
1008 for (auto& f : futures)
1009 error += f.get ();
1010 }
1011 else
1012 {
1013 for (auto& batch : batches)
1014 {
1015 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1016 error += minimizer ((*this), weights, settingsAndBatch);
1017 }
1018 }
1019
1020 numBatches_stored = std::max (numBatches_stored, size_t(1)); /// normalize the error
1021 error /= numBatches_stored;
1022 settings.testIteration ();
1023
1024 return error;
1025 }
1026
1027
1028
1029
1030
1031/*! \brief compute the neural net
1032 *
1033 * \param input the input data
1034 * \param weights the weight data
1035 */
1036 template <typename Weights>
1037 std::vector<double> Net::compute (const std::vector<double>& input, const Weights& weights) const
1038 {
1039 std::vector<LayerData> layerData;
1040 layerData.reserve (m_layers.size ()+1);
1041 auto itWeight = begin (weights);
1042 auto itInputBegin = begin (input);
1043 auto itInputEnd = end (input);
1044 layerData.push_back (LayerData (itInputBegin, itInputEnd));
1045 size_t numNodesPrev = input.size ();
1046
1047 // -------------------- prepare layer data with one pattern -------------------------------
1048 for (auto& layer: m_layers)
1049 {
1050 layerData.push_back (LayerData (layer.numNodes (), itWeight,
1051 layer.activationFunction (),
1052 layer.modeOutputValues ()));
1053 size_t _numWeights = layer.numWeights (numNodesPrev);
1054 itWeight += _numWeights;
1055 numNodesPrev = layer.numNodes ();
1056 }
1057
1058
1059 // --------- forward -------------
1060 forwardPattern (m_layers, layerData);
1061
1062 // ------------- fetch output ------------------
1063 std::vector<double> output;
1064 fetchOutput (layerData.back (), output);
1065 return output;
1066 }
1067
1068
1069 template <typename Weights, typename PassThrough>
1070 double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights) const
1071 {
1072 std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1073 assert (numWeights () == weights.size ());
1074 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing, false);
1075 return error;
1076 }
1077
1078 template <typename Weights, typename PassThrough, typename OutContainer>
1079 double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput /*eFetch*/, OutContainer& outputContainer) const
1080 {
1081 std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1082 assert (numWeights () == weights.size ());
1083 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer, true);
1084 return error;
1085 }
1086
1087
1088 template <typename Weights, typename Gradients, typename PassThrough>
1089 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const
1090 {
1091 std::vector<double> nothing;
1092 assert (numWeights () == weights.size ());
1093 assert (weights.size () == gradients.size ());
1094 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing, false);
1095 return error;
1096 }
1097
1098 template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1099 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const
1100 {
1101 MATH_UNUSED(eFetch);
1102 assert (numWeights () == weights.size ());
1103 assert (weights.size () == gradients.size ());
1104 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer, true);
1105 return error;
1106 }
1107
1108
1109
1110 template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1111 std::vector<std::vector<LayerData>> Net::prepareLayerData (LayerContainer& _layers,
1112 Batch& batch,
1113 const DropContainer& dropContainer,
1114 ItWeight itWeightBegin,
1115 ItWeight /*itWeightEnd*/,
1116 ItGradient itGradientBegin,
1117 ItGradient itGradientEnd,
1118 size_t& totalNumWeights) const
1119 {
1121 bool usesDropOut = !dropContainer.empty ();
1122 if (usesDropOut)
1123 itDropOut = std::begin (dropContainer);
1124
1125 if (_layers.empty ())
1126 throw std::string ("no layers in this net");
1127
1128
1129 // ----------- create layer data -------------------------------------------------------
1130 //LM- This assert not needed anymore (outputsize is actually numNodes+1)
1131 //assert (_layers.back ().numNodes () == outputSize ());
1132 totalNumWeights = 0;
1133 std::vector<std::vector<LayerData>> layerPatternData;
1134 layerPatternData.reserve (_layers.size ()+1);
1135 ItWeight itWeight = itWeightBegin;
1136 ItGradient itGradient = itGradientBegin;
1137 size_t numNodesPrev = inputSize ();
1138 typename Pattern::const_iterator itInputBegin;
1139 typename Pattern::const_iterator itInputEnd;
1140
1141 // ItWeight itGammaBegin = itWeightBegin + numWeights ();
1142 // ItWeight itBetaBegin = itWeightBegin + numWeights () + numNodes ();
1143 // ItGradient itGradGammaBegin = itGradientBegin + numWeights ();
1144 // ItGradient itGradBetaBegin = itGradientBegin + numWeights () + numNodes ();
1145
1146
1147 // --------------------- prepare layer data for input layer ----------------------------
1148 layerPatternData.push_back (std::vector<LayerData>());
1149 for (const Pattern& _pattern : batch)
1150 {
1151 std::vector<LayerData>& layerData = layerPatternData.back ();
1152 layerData.push_back (LayerData (numNodesPrev));
1153
1154 itInputBegin = _pattern.beginInput ();
1155 itInputEnd = _pattern.endInput ();
1156 layerData.back ().setInput (itInputBegin, itInputEnd);
1157
1158 if (usesDropOut)
1159 layerData.back ().setDropOut (itDropOut);
1160
1161 }
1162
1163
1164 if (usesDropOut)
1165 itDropOut += _layers.back ().numNodes ();
1166
1167 // ---------------- prepare subsequent layers ---------------------------------------------
1168 // for each of the layers
1169 for (auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1170 {
1171 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1172 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1173
1174 auto& layer = *itLayer;
1175 layerPatternData.push_back (std::vector<LayerData>());
1176 // for each pattern, prepare a layerData
1177 for (const Pattern& _pattern : batch)
1178 {
1179 std::vector<LayerData>& layerData = layerPatternData.back ();
1180 //layerData.push_back (LayerData (numNodesPrev));
1181
1182 if (itGradientBegin == itGradientEnd)
1183 {
1184 layerData.push_back (LayerData (layer.numNodes (), itWeight,
1185 layer.activationFunction (),
1186 layer.modeOutputValues ()));
1187 }
1188 else
1189 {
1190 layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient,
1191 layer.activationFunction (),
1192 layer.inverseActivationFunction (),
1193 layer.modeOutputValues ()));
1194 }
1195
1196 if (usesDropOut)
1197 {
1198 layerData.back ().setDropOut (itDropOut);
1199 }
1200
1201 }
1202
1203 if (usesDropOut)
1204 {
1205 itDropOut += layer.numNodes ();
1206 }
1207 size_t _numWeights = layer.numWeights (numNodesPrev);
1208 totalNumWeights += _numWeights;
1209 itWeight += _numWeights;
1210 itGradient += _numWeights;
1211 numNodesPrev = layer.numNodes ();
1212
1213 }
1214 assert (totalNumWeights > 0);
1215 return layerPatternData;
1216}
1217
1218
1219
1220 template <typename LayerContainer>
1221 void Net::forwardPattern (const LayerContainer& _layers,
1222 std::vector<LayerData>& layerData) const
1223 {
1224 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1225 for (; idxLayer < idxLayerEnd; ++idxLayer)
1226 {
1227 LayerData& prevLayerData = layerData.at (idxLayer);
1228 LayerData& currLayerData = layerData.at (idxLayer+1);
1229
1230 forward (prevLayerData, currLayerData);
1231
1232 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1233 }
1234 }
1235
1236
1237
1238
1239 template <typename LayerContainer, typename LayerPatternContainer>
1240 void Net::forwardBatch (const LayerContainer& _layers,
1241 LayerPatternContainer& layerPatternData,
1242 std::vector<double>& valuesMean,
1243 std::vector<double>& valuesStdDev,
1244 size_t trainFromLayer) const
1245 {
1246 valuesMean.clear ();
1247 valuesStdDev.clear ();
1248
1249 // ---------------------------------- loop over layers and pattern -------------------------------------------------------
1250 for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1251 {
1252 bool doTraining = idxLayer >= trainFromLayer;
1253
1254 // get layer-pattern data for this and the corresponding one from the next layer
1255 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1256 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1257
1258 size_t numPattern = prevLayerPatternData.size ();
1259 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1260
1261 std::vector<MeanVariance> means (numNodesLayer);
1262 // ---------------- loop over layerDatas of pattern compute forward ----------------------------
1263 for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1264 {
1265 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1266 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1267
1268
1269 forward (prevLayerData, currLayerData); // feed forward
1270 }
1271
1272 // ---------------- loop over layerDatas of pattern apply non-linearities ----------------------------
1273 for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1274 {
1275 //const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1276 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1277
1278 if (doTraining)
1279 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (),
1280 currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ());
1281 else
1282 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1283 }
1284 }
1285}
1286
1287
1288
1289
1290 template <typename OutputContainer>
1291 void Net::fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const
1292 {
1293 ModeOutputValues eModeOutput = lastLayerData.outputMode ();
1294 if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput))
1295 {
1296 outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ());
1297 }
1298 else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) ||
1299 isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput))
1300 {
1301 const auto& prob = lastLayerData.probabilities ();
1302 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1303 }
1304 else
1305 assert (false);
1306 }
1307
1308
1309
1310
1311 template <typename OutputContainer>
1312 void Net::fetchOutput (const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer) const
1313 {
1314 for (const LayerData& lastLayerData : lastLayerPatternData)
1315 fetchOutput (lastLayerData, outputContainer);
1316 }
1317
1318
1319
1320 template <typename ItWeight>
1321 std::tuple</*sumError*/double,/*sumWeights*/double> Net::computeError (const Settings& settings,
1322 std::vector<LayerData>& lastLayerData,
1323 Batch& batch,
1324 ItWeight itWeightBegin,
1325 ItWeight itWeightEnd) const
1326 {
1327 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1328// typename std::vector<LayerData>::iterator itLayerDataEnd = lastLayerData.end ();
1329
1330 typename std::vector<Pattern>::const_iterator itPattern = batch.begin ();
1331 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.end ();
1332
1333 double sumWeights (0.0);
1334 double sumError (0.0);
1335
1336 size_t idxPattern = 0;
1337// FIXME: check that iteration doesn't go beyond itLayerDataEnd!
1338 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1339 {
1340 ++idxPattern;
1341
1342 // compute E and the deltas of the computed output and the true output
1343 LayerData& layerData = (*itLayerData);
1344 const Pattern& _pattern = (*itPattern);
1345 double error = errorFunction (layerData, _pattern.output (),
1346 itWeightBegin, itWeightEnd,
1347 _pattern.weight (), settings.factorWeightDecay (),
1348 settings.regularization ());
1349 sumWeights += fabs (_pattern.weight ());
1350 sumError += error;
1351 }
1352 return std::make_tuple (sumError, sumWeights);
1353 }
1354
1355
1356
1357 template <typename Settings>
1358 void Net::backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1359 const Settings& settings,
1360 size_t trainFromLayer,
1361 size_t totalNumWeights) const
1362 {
1363 bool doTraining = layerPatternData.size () > trainFromLayer;
1364 if (doTraining) // training
1365 {
1366 // ------------- backpropagation -------------
1367 size_t idxLayer = layerPatternData.size ();
1368 for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1369 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1370 {
1371 --idxLayer;
1372 if (idxLayer <= trainFromLayer) // no training
1373 break;
1374
1375 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1376 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1377
1378 size_t idxPattern = 0;
1379// FIXME: check that itPrevLayerData doesn't go beyond itPrevLayerDataEnd!
1380 for (typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1381 itPrevLayerData = begin (prevLayerDataColl) /*, itPrevLayerDataEnd = end (prevLayerDataColl)*/;
1382 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1383 {
1384 LayerData& currLayerData = (*itCurrLayerData);
1385 LayerData& prevLayerData = *(itPrevLayerData);
1386
1387 backward (prevLayerData, currLayerData);
1388
1389 // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses)
1390 // because L1 and L2 regularization
1391 //
1392 // http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization
1393 //
1394 // L1 : -factorWeightDecay*sgn(w)/numWeights
1395 // L2 : -factorWeightDecay/numWeights
1396 update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ());
1397 }
1398 }
1399 }
1400 }
1401
1402
1403
1404/*! \brief forward propagation and backward propagation
1405 *
1406 *
1407 */
1408 template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1409 double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch,
1410 ItWeight itWeightBegin, ItWeight itWeightEnd,
1411 ItGradient itGradientBegin, ItGradient itGradientEnd,
1412 size_t trainFromLayer,
1413 OutContainer& outputContainer, bool doFetchOutput) const
1414 {
1415 Settings& settings = std::get<0>(settingsAndBatch);
1416 Batch& batch = std::get<1>(settingsAndBatch);
1417 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1418
1419 double sumError = 0.0;
1420 double sumWeights = 0.0; // -------------
1421
1422
1423 // ----------------------------- prepare layer data -------------------------------------
1424 size_t totalNumWeights (0);
1425 std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1426 batch,
1427 dropContainer,
1428 itWeightBegin,
1429 itWeightEnd,
1430 itGradientBegin,
1431 itGradientEnd,
1432 totalNumWeights);
1433
1434
1435
1436 // ---------------------------------- propagate forward ------------------------------------------------------------------
1437 std::vector<double> valuesMean;
1438 std::vector<double> valuesStdDev;
1439 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1440
1441
1442 // ------------- fetch output ------------------
1443 if (doFetchOutput)
1444 {
1445 fetchOutput (layerPatternData.back (), outputContainer);
1446 }
1447
1448
1449 // ------------- error computation -------------
1450 std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1451
1452
1453 // ------------- backpropagation -------------
1454 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1455
1456
1457 // --- compile the measures
1458 double batchSize = std::distance (std::begin (batch), std::end (batch));
1459 for (auto it = itGradientBegin; it != itGradientEnd; ++it)
1460 (*it) /= batchSize;
1461
1462
1463 sumError /= sumWeights;
1464 return sumError;
1465 }
1466
1467
1468
1469/*! \brief initialization of the weights
1470 *
1471 *
1472 */
1473 template <typename OutIterator>
1474 void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
1475 {
1476 if (eInitStrategy == WeightInitializationStrategy::XAVIER)
1477 {
1478 // input and output properties
1479 int numInput = inputSize ();
1480
1481 // compute variance and mean of input and output
1482 //...
1483
1484
1485 // compute the weights
1486 for (auto& layer: layers ())
1487 {
1488 double nIn = numInput;
1489 double stdDev = sqrt (2.0/nIn);
1490 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1491 {
1492 (*itWeight) = DNN::gaussDouble (0.0, stdDev); // factor 2.0 for ReLU
1493 ++itWeight;
1494 }
1495 numInput = layer.numNodes ();
1496 }
1497 return;
1498 }
1499
1501 {
1502 // input and output properties
1503 int numInput = inputSize ();
1504
1505 // compute variance and mean of input and output
1506 //...
1507
1508
1509 // compute the weights
1510 for (auto& layer: layers ())
1511 {
1512 double nIn = numInput;
1513 double minVal = -sqrt(2.0/nIn);
1514 double maxVal = sqrt (2.0/nIn);
1515 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1516 {
1517
1518 (*itWeight) = DNN::uniformDouble (minVal, maxVal); // factor 2.0 for ReLU
1519 ++itWeight;
1520 }
1521 numInput = layer.numNodes ();
1522 }
1523 return;
1524 }
1525
1526 if (eInitStrategy == WeightInitializationStrategy::TEST)
1527 {
1528 // input and output properties
1529 int numInput = inputSize ();
1530
1531 // compute variance and mean of input and output
1532 //...
1533
1534
1535 // compute the weights
1536 for (auto& layer: layers ())
1537 {
1538// double nIn = numInput;
1539 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1540 {
1541 (*itWeight) = DNN::gaussDouble (0.0, 0.1);
1542 ++itWeight;
1543 }
1544 numInput = layer.numNodes ();
1545 }
1546 return;
1547 }
1548
1549 if (eInitStrategy == WeightInitializationStrategy::LAYERSIZE)
1550 {
1551 // input and output properties
1552 int numInput = inputSize ();
1553
1554 // compute variance and mean of input and output
1555 //...
1556
1557
1558 // compute the weights
1559 for (auto& layer: layers ())
1560 {
1561 double nIn = numInput;
1562 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1563 {
1564 (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn))); // factor 2.0 for ReLU
1565 ++itWeight;
1566 }
1567 numInput = layer.numNodes ();
1568 }
1569 return;
1570 }
1571
1572 }
1573
1574
1575
1576
1577
1578/*! \brief compute the error function
1579 *
1580 *
1581 */
1582 template <typename Container, typename ItWeight>
1583 double Net::errorFunction (LayerData& layerData,
1584 Container truth,
1585 ItWeight itWeight,
1586 ItWeight itWeightEnd,
1587 double patternWeight,
1588 double factorWeightDecay,
1589 EnumRegularization eRegularization) const
1590 {
1591 double error (0);
1592 switch (m_eErrorFunction)
1593 {
1595 {
1596 error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth),
1597 layerData.deltasBegin (), layerData.deltasEnd (),
1598 layerData.inverseActivationFunction (),
1599 patternWeight);
1600 break;
1601 }
1603 {
1605 std::vector<double> probabilities = layerData.probabilities ();
1606 error = crossEntropy (begin (probabilities), end (probabilities),
1607 begin (truth), end (truth),
1608 layerData.deltasBegin (), layerData.deltasEnd (),
1609 layerData.inverseActivationFunction (),
1610 patternWeight);
1611 break;
1612 }
1614 {
1615 std::cout << "softmax." << std::endl;
1617 std::vector<double> probabilities = layerData.probabilities ();
1618 error = softMaxCrossEntropy (begin (probabilities), end (probabilities),
1619 begin (truth), end (truth),
1620 layerData.deltasBegin (), layerData.deltasEnd (),
1621 layerData.inverseActivationFunction (),
1622 patternWeight);
1623 break;
1624 }
1625 }
1626 if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE)
1627 {
1628 error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
1629 }
1630 return error;
1631 }
1632
1633
1634
1635
1636
1637
1638
1639// /*! \brief pre-training
1640// *
1641// * in development
1642// */
1643// template <typename Minimizer>
1644// void Net::preTrain (std::vector<double>& weights,
1645// std::vector<Pattern>& trainPattern,
1646// const std::vector<Pattern>& testPattern,
1647// Minimizer& minimizer, Settings& settings)
1648// {
1649// auto itWeightGeneral = std::begin (weights);
1650// std::vector<Pattern> prePatternTrain (trainPattern.size ());
1651// std::vector<Pattern> prePatternTest (testPattern.size ());
1652
1653// size_t _inputSize = inputSize ();
1654
1655// // transform pattern using the created preNet
1656// auto initializePrePattern = [&](const std::vector<Pattern>& pttrnInput, std::vector<Pattern>& pttrnOutput)
1657// {
1658// pttrnOutput.clear ();
1659// std::transform (std::begin (pttrnInput), std::end (pttrnInput),
1660// std::back_inserter (pttrnOutput),
1661// [](const Pattern& p)
1662// {
1663// Pattern pat (p.input (), p.input (), p.weight ());
1664// return pat;
1665// });
1666// };
1667
1668// initializePrePattern (trainPattern, prePatternTrain);
1669// initializePrePattern (testPattern, prePatternTest);
1670
1671// std::vector<double> originalDropFractions = settings.dropFractions ();
1672
1673// for (auto& _layer : layers ())
1674// {
1675// // compute number of weights (as a function of the number of incoming nodes)
1676// // fetch number of nodes
1677// size_t numNodes = _layer.numNodes ();
1678// size_t _numWeights = _layer.numWeights (_inputSize);
1679
1680// // ------------------
1681// DNN::Net preNet;
1682// if (!originalDropFractions.empty ())
1683// {
1684// originalDropFractions.erase (originalDropFractions.begin ());
1685// settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ());
1686// }
1687// std::vector<double> preWeights;
1688
1689// // define the preNet (pretraining-net) for this layer
1690// // outputSize == inputSize, because this is an autoencoder;
1691// preNet.setInputSize (_inputSize);
1692// preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ()));
1693// preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT));
1694// preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES);
1695// preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder)
1696
1697// // initialize weights
1698// preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM,
1699// std::back_inserter (preWeights));
1700
1701// // overwrite already existing weights from the "general" weights
1702// std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ());
1703// std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer
1704
1705
1706// // train the "preNet"
1707// preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings);
1708
1709// // fetch the pre-trained weights (without the output part of the autoencoder)
1710// std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral);
1711
1712// // advance the iterator on the incoming weights
1713// itWeightGeneral += _numWeights;
1714
1715// // remove the weights of the output layer of the preNet
1716// preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ());
1717
1718// // remove the outputLayer of the preNet
1719// preNet.removeLayer ();
1720
1721// // set the output size to the number of nodes in the new output layer (== last hidden layer)
1722// preNet.setOutputSize (numNodes);
1723
1724// // transform pattern using the created preNet
1725// auto proceedPattern = [&](std::vector<Pattern>& pttrn)
1726// {
1727// std::vector<Pattern> newPttrn;
1728// std::for_each (std::begin (pttrn), std::end (pttrn),
1729// [&preNet,&preWeights,&newPttrn](Pattern& p)
1730// {
1731// std::vector<double> output = preNet.compute (p.input (), preWeights);
1732// Pattern pat (output, output, p.weight ());
1733// newPttrn.push_back (pat);
1734// // p = pat;
1735// });
1736// return newPttrn;
1737// };
1738
1739
1740// prePatternTrain = proceedPattern (prePatternTrain);
1741// prePatternTest = proceedPattern (prePatternTest);
1742
1743
1744// // the new input size is the output size of the already reduced preNet
1745// _inputSize = preNet.layers ().back ().numNodes ();
1746// }
1747// }
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764 } // namespace DNN
1765} // namespace TMVA
1766
1767#endif
double
#define f(i)
Definition RSha256.hxx:104
#define g(i)
Definition RSha256.hxx:105
@ kMagenta
Definition Rtypes.h:66
@ kBlue
Definition Rtypes.h:66
char * Form(const char *fmt,...)
#define MATH_UNUSED(var)
Definition Util.h:34
Double_t(* Function)(Double_t)
Definition Functor.C:4
double weight() const
Definition Pattern.h:74
std::vector< double >::const_iterator const_iterator
Definition Pattern.h:12
std::vector< double > & output()
Definition Pattern.h:84
The Batch class encapsulates one mini-batch.
Definition NeuralNet.h:235
const_iterator begin() const
Definition NeuralNet.h:244
const_iterator end() const
Definition NeuralNet.h:245
size_t size() const
Definition NeuralNet.h:247
LayerData holds the data of one layer.
Definition NeuralNet.h:437
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition NeuralNet.h:583
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition NeuralNet.h:597
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition NeuralNet.h:591
ModeOutputValues outputMode() const
returns the output mode
Definition NeuralNet.h:588
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition NeuralNet.h:608
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition NeuralNet.h:592
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition NeuralNet.h:607
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition NeuralNet.h:589
DropContainer::const_iterator const_dropout_iterator
Definition NeuralNet.h:448
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition NeuralNet.h:582
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
bool * fExitFromTraining
Definition NeuralNet.h:1277
std::vector< Layer > m_layers
layer-structure-data
Definition NeuralNet.h:1272
UInt_t * fIPyMaxIter
Definition NeuralNet.h:1278
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
size_t inputSize() const
input size of the DNN
Definition NeuralNet.h:1098
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition NeuralNet.h:1269
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition NeuralNet.h:1245
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
size_t outputSize() const
output size of the DNN
Definition NeuralNet.h:1099
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
UInt_t * fIPyCurrentIter
Definition NeuralNet.h:1278
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
IPythonInteractive * fInteractive
Definition NeuralNet.h:1276
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Settings for the training of the neural net.
Definition NeuralNet.h:730
bool useMultithreading() const
is multithreading turned on?
Definition NeuralNet.h:815
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition NeuralNet.h:813
size_t convergenceCount() const
returns the current convergence count
Definition NeuralNet.h:827
size_t testRepetitions() const
how often is the test data tested
Definition NeuralNet.h:768
virtual void endTestCycle()
callback for monitoring and loggging
Definition NeuralNet.h:805
virtual void testIteration()
callback for monitoring and loggging
Definition NeuralNet.h:806
virtual bool hasConverged(double testError)
has this training converged already?
virtual void cycle(double progress, TString text)
Definition NeuralNet.h:799
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition NeuralNet.h:788
const std::vector< double > & dropFractions() const
Definition NeuralNet.h:762
void addPoint(std::string histoName, double x)
for monitoring
Definition NeuralNet.h:821
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition NeuralNet.h:781
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition NeuralNet.h:823
virtual void startTrainCycle()
Definition NeuralNet.h:782
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition NeuralNet.h:766
double factorWeightDecay() const
get the weight-decay factor
Definition NeuralNet.h:769
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition NeuralNet.h:828
void pads(int numPads)
preparation for monitoring
Definition NeuralNet.h:818
size_t batchSize() const
mini-batch size
Definition NeuralNet.h:767
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition NeuralNet.h:809
size_t dropRepetitions() const
Definition NeuralNet.h:761
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition NeuralNet.h:819
virtual void startTestCycle()
callback for monitoring and loggging
Definition NeuralNet.h:804
double m_beta
internal parameter (momentum)
Definition NeuralNet.h:372
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition NeuralNet.h:376
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition NeuralNet.h:373
double m_alpha
internal parameter (learningRate)
Definition NeuralNet.h:371
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition NeuralNet.h:375
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Basic string class.
Definition TString.h:136
const Int_t n
Definition legend1.C:16
std::shared_ptr< std::function< double(double)> > InvGauss
Definition NeuralNet.cxx:14
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double uniformDouble(double minValue, double maxValue)
Definition NeuralNet.cxx:43
std::shared_ptr< std::function< double(double)> > SymmReLU
Definition NeuralNet.cxx:30
std::shared_ptr< std::function< double(double)> > TanhShift
Definition NeuralNet.cxx:31
std::shared_ptr< std::function< double(double)> > Tanh
Definition NeuralNet.cxx:29
std::shared_ptr< std::function< double(double)> > InvSigmoid
Definition NeuralNet.cxx:18
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
T uniformFromTo(T from, T to)
Definition NeuralNet.icc:34
double computeRegularization< EnumRegularization::L1 >(double weight, const double &factorWeightDecay)
std::shared_ptr< std::function< double(double)> > SoftPlus
Definition NeuralNet.cxx:27
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
std::shared_ptr< std::function< double(double)> > ZeroFnc
Definition NeuralNet.cxx:28
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
std::shared_ptr< std::function< double(double)> > InvSoftSign
Definition NeuralNet.cxx:20
std::shared_ptr< std::function< double(double)> > InvGaussComplement
Definition NeuralNet.cxx:15
double computeRegularization< EnumRegularization::L2 >(double weight, const double &factorWeightDecay)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition Functions.h:238
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
std::shared_ptr< std::function< double(double)> > InvTanh
Definition NeuralNet.cxx:22
std::shared_ptr< std::function< double(double)> > Linear
Definition NeuralNet.cxx:24
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition NeuralNet.h:1050
std::shared_ptr< std::function< double(double)> > InvReLU
Definition NeuralNet.cxx:17
std::shared_ptr< std::function< double(double)> > GaussComplement
Definition NeuralNet.cxx:13
std::shared_ptr< std::function< double(double)> > Gauss
Definition NeuralNet.cxx:12
std::shared_ptr< std::function< double(double)> > Sigmoid
Definition NeuralNet.cxx:26
double gaussDouble(double mean, double sigma)
Definition NeuralNet.cxx:35
std::shared_ptr< std::function< double(double)> > SoftSign
Definition NeuralNet.cxx:32
std::shared_ptr< std::function< double(double)> > InvSoftPlus
Definition NeuralNet.cxx:19
std::shared_ptr< std::function< double(double)> > ReLU
Definition NeuralNet.cxx:25
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition NeuralNet.h:1294
bool isFlagSet(T flag, T value)
Definition NeuralNet.h:212
std::shared_ptr< std::function< double(double)> > InvTanhShift
Definition NeuralNet.cxx:23
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
std::vector< char > DropContainer
Definition NeuralNet.h:227
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
std::shared_ptr< std::function< double(double)> > InvSymmReLU
Definition NeuralNet.cxx:21
std::shared_ptr< std::function< double(double)> > InvLinear
Definition NeuralNet.cxx:16
create variable transformations
static void output(int code)
Definition gifencode.c:226