1 #ifndef TMVA_NEURAL_NET_I 2 #define TMVA_NEURAL_NET_I 4 #ifndef TMVA_NEURAL_NET 5 #error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead." 6 #endif // TMVA_NEURAL_NET 8 #pragma GCC diagnostic ignored "-Wunused-variable" 34 return from + (rand ()* (to - from)/RAND_MAX);
39 template <
typename Container,
typename T>
42 for (
auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
50 static std::shared_ptr<std::function<double(double)>>
ZeroFnc = std::make_shared<std::function<double(double)>> ([](
double ){
return 0; });
53 static std::shared_ptr<std::function<double(double)>>
Sigmoid = std::make_shared<std::function<double(double)>> ([](
double value){ value = std::max (-100.0, std::min (100.0,value));
return 1.0/(1.0 +
std::exp (-value)); });
54 static std::shared_ptr<std::function<double(double)>>
InvSigmoid = std::make_shared<std::function<double(double)>> ([](
double value){
double s = (*Sigmoid.get ()) (value);
return s*(1.0-
s); });
56 static std::shared_ptr<std::function<double(double)>>
Tanh = std::make_shared<std::function<double(double)>> ([](
double value){
return tanh (value); });
57 static std::shared_ptr<std::function<double(double)>>
InvTanh = std::make_shared<std::function<double(double)>> ([](
double value){
return 1.0 -
std::pow (value, 2.0); });
59 static std::shared_ptr<std::function<double(double)>>
Linear = std::make_shared<std::function<double(double)>> ([](
double value){
return value; });
60 static std::shared_ptr<std::function<double(double)>>
InvLinear = std::make_shared<std::function<double(double)>> ([](
double ){
return 1.0; });
62 static std::shared_ptr<std::function<double(double)>>
SymmReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.3;
return value > margin ? value-margin : value < -margin ? value+margin : 0; });
63 static std::shared_ptr<std::function<double(double)>>
InvSymmReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.3;
return value > margin ? 1.0 : value < -margin ? 1.0 : 0; });
65 static std::shared_ptr<std::function<double(double)>>
ReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.0;
return value > margin ? value-margin : 0; });
66 static std::shared_ptr<std::function<double(double)>>
InvReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.0;
return value > margin ? 1.0 : 0; });
68 static std::shared_ptr<std::function<double(double)>>
SoftPlus = std::make_shared<std::function<double(double)>> ([](
double value){
return std::log (1.0+
std::exp (value)); });
69 static std::shared_ptr<std::function<double(double)>>
InvSoftPlus = std::make_shared<std::function<double(double)>> ([](
double value){
return 1.0 / (1.0 +
std::exp (-value)); });
71 static std::shared_ptr<std::function<double(double)>>
TanhShift = std::make_shared<std::function<double(double)>> ([](
double value){
return tanh (value-0.3); });
72 static std::shared_ptr<std::function<double(double)>>
InvTanhShift = std::make_shared<std::function<double(double)>> ([](
double value){
return 0.3 + (1.0 -
std::pow (value, 2.0)); });
74 static std::shared_ptr<std::function<double(double)>>
SoftSign = std::make_shared<std::function<double(double)>> ([](
double value){
return value / (1.0 +
fabs (value)); });
75 static std::shared_ptr<std::function<double(double)>>
InvSoftSign = std::make_shared<std::function<double(double)>> ([](
double value){
return std::pow ((1.0 -
fabs (value)),2.0); });
77 static std::shared_ptr<std::function<double(double)>>
Gauss = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return exp (-
std::pow(value*s,2.0)); });
78 static std::shared_ptr<std::function<double(double)>>
InvGauss = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return -2.0 * value * s*s * (*Gauss.get ()) (value); });
80 static std::shared_ptr<std::function<double(double)>>
GaussComplement = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return 1.0 -
exp (-
std::pow(value*s,2.0)); });
81 static std::shared_ptr<std::function<double(double)>>
InvGaussComplement = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return +2.0 * value * s*s * (*GaussComplement.get ()) (value); });
89 template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItTarget,
typename ItDrop>
92 ItTarget itTargetBegin, ItTarget itTargetEnd,
95 for (
auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
97 for (
auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
99 if (!HasDropOut || *itDrop)
100 (*itTarget) += (*itSource) * (*itWeight);
103 if (HasDropOut) ++itDrop;
116 template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItPrev,
typename ItDrop>
119 ItPrev itPrevBegin, ItPrev itPrevEnd,
122 for (
auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
124 for (
auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
126 if (!HasDropOut || *itDrop)
127 (*itPrev) += (*itCurr) * (*itWeight);
130 if (HasDropOut) ++itDrop;
145 template <
typename ItValue,
typename Fnc>
148 while (itValue != itValueEnd)
150 auto& value = (*itValue);
151 value = (*fnc.get ()) (value);
162 template <
typename ItValue,
typename Fnc,
typename InvFnc,
typename ItGradient>
163 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
165 while (itValue != itValueEnd)
167 auto& value = (*itValue);
168 value = (*fnc.get ()) (value);
169 (*itGradient) = (*invFnc.get ()) (value);
171 ++itValue; ++itGradient;
181 template <
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient>
182 void update (ItSource itSource, ItSource itSourceEnd,
183 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
184 ItTargetGradient itTargetGradientBegin,
185 ItGradient itGradient)
187 while (itSource != itSourceEnd)
189 auto itTargetDelta = itTargetDeltaBegin;
190 auto itTargetGradient = itTargetGradientBegin;
191 while (itTargetDelta != itTargetDeltaEnd)
193 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
194 ++itTargetDelta; ++itTargetGradient; ++itGradient;
207 template <EnumRegularization Regularization>
218 inline double computeRegularization<EnumRegularization::L1> (
double weight,
const double& factorWeightDecay)
220 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
225 inline double computeRegularization<EnumRegularization::L2> (
double weight,
const double& factorWeightDecay)
227 return factorWeightDecay * weight;
235 template <EnumRegularization Regularization,
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient,
typename ItWeight>
236 void update (ItSource itSource, ItSource itSourceEnd,
237 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
238 ItTargetGradient itTargetGradientBegin,
239 ItGradient itGradient,
243 while (itSource != itSourceEnd)
245 auto itTargetDelta = itTargetDeltaBegin;
246 auto itTargetGradient = itTargetGradientBegin;
247 while (itTargetDelta != itTargetDeltaEnd)
249 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,
weightDecay);
250 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
261 #define USELOCALWEIGHTS 1 269 template <
typename Function,
typename Weights,
typename PassThrough>
272 size_t numWeights = weights.size ();
287 size_t currentRepetition = 0;
300 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
303 (*itLocWeight) += (*itPrevG);
316 double maxGrad = 0.0;
317 for (; itG != itGEnd; ++itG, ++itPrevG)
319 double currGrad = (*itG);
320 double prevGrad = (*itPrevG);
324 currGrad += prevGrad;
326 (*itPrevG) = currGrad;
335 std::cout <<
"\nlearning rate reduced to " <<
m_alpha << std::endl;
336 std::for_each (weights.begin (), weights.end (), [maxGrad](
double& w)
344 auto itW = std::begin (weights);
380 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename InvFnc>
381 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc,
double patternWeight)
383 double errorSum = 0.0;
386 ItTruth itTruth = itTruthBegin;
387 bool hasDeltas = (itDelta != itDeltaEnd);
388 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
391 double output = (*itOutput);
392 double error = output - (*itTruth);
395 (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
398 errorSum += error*error * patternWeight;
410 template <
typename ItProbability,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
411 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
413 bool hasDeltas = (itDelta != itDeltaEnd);
415 double errorSum = 0.0;
416 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
418 double probability = *itProbability;
419 double truth = *itTruthBegin;
422 truth = truth < 0.5 ? 0.1 : 0.9;
425 double delta = probability - truth;
426 (*itDelta) = delta*patternWeight;
431 if (probability == 0)
436 else if (probability == 1)
442 error += - (truth *
log (probability) + (1.0-truth) *
log (1.0-probability));
443 errorSum += error * patternWeight;
456 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
457 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
459 double errorSum = 0.0;
461 bool hasDeltas = (itDelta != itDeltaEnd);
463 ItTruth itTruth = itTruthBegin;
464 for (
auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
467 double probability = (*itProbability);
468 double truth = (*itTruth);
471 (*itDelta) = probability - truth;
477 error += truth *
log (probability);
481 return -errorSum * patternWeight;
496 template <
typename ItWeight>
504 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
506 double weight = (*itWeight);
509 return error + 0.5 * w * factorWeightDecay /
n;
516 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
518 double weight = (*itWeight);
521 return error + 0.5 * w * factorWeightDecay /
n;
544 template <
typename LAYERDATA>
545 void forward (
const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
547 if (prevLayerData.hasDropOut ())
549 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
550 currLayerData.weightsBegin (),
551 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
552 prevLayerData.dropOut ());
557 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
558 currLayerData.weightsBegin (),
559 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
570 template <
typename LAYERDATA>
571 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
573 if (prevLayerData.hasDropOut ())
575 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
576 currLayerData.weightsBegin (),
577 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
578 prevLayerData.dropOut ());
583 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
584 currLayerData.weightsBegin (),
585 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
598 template <
typename LAYERDATA>
602 if (factorWeightDecay != 0.0)
605 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
606 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
607 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
608 currLayerData.weightsBegin (), factorWeightDecay);
612 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
613 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
614 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
615 currLayerData.weightsBegin (), factorWeightDecay);
619 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
620 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
621 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
626 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
627 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
628 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
650 template <
typename WeightsType,
typename DropProbabilities>
652 const DropProbabilities& drops,
655 if (drops.empty () || weights.empty ())
658 auto itWeight = std::begin (weights);
659 auto itWeightEnd = std::end (weights);
660 auto itDrop = std::begin (drops);
661 auto itDropEnd = std::end (drops);
662 size_t numNodesPrev = inputSize ();
663 double dropFractionPrev = *itDrop;
666 for (
auto& layer : layers ())
668 if (itDrop == itDropEnd)
671 size_t _numNodes = layer.numNodes ();
673 double dropFraction = *itDrop;
674 double pPrev = 1.0 - dropFractionPrev;
675 double p = 1.0 - dropFraction;
682 size_t _numWeights = layer.numWeights (numNodesPrev);
683 for (
size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
685 if (itWeight == itWeightEnd)
691 numNodesPrev = _numNodes;
692 dropFractionPrev = dropFraction;
710 template <
typename Minimizer>
712 std::vector<Pattern>& trainPattern,
713 const std::vector<Pattern>& testPattern,
721 if (fIPyMaxIter) *fIPyMaxIter = 100;
724 settings.
create (
"trainErrors", 100, 0, 100, 100, 0,1);
725 settings.
create (
"testErrors", 100, 0, 100, 100, 0,1);
727 size_t cycleCount = 0;
728 size_t testCycleCount = 0;
729 double testError = 1e20;
730 double trainError = 1e20;
731 size_t dropOutChangeCount = 0;
735 const std::vector<double>& dropFractions = settings.
dropFractions ();
736 bool isWeightsForDrop =
false;
745 size_t dropIndex = 0;
746 if (!dropFractions.empty () && dropOutChangeCount % settings.
dropRepetitions () == 0)
749 dropContainer.clear ();
750 size_t _numNodes = inputSize ();
751 double dropFraction = 0.0;
752 dropFraction = dropFractions.at (dropIndex);
754 fillDropContainer (dropContainer, dropFraction, _numNodes);
755 for (
auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
757 auto& layer = *itLayer;
758 _numNodes = layer.numNodes ();
761 if (dropFractions.size () > dropIndex)
762 dropFraction = dropFractions.at (dropIndex);
764 fillDropContainer (dropContainer, dropFraction, _numNodes);
766 isWeightsForDrop =
true;
770 trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
774 bool hasConverged =
false;
777 if (isWeightsForDrop)
779 dropOutWeightFactor (weights, dropFractions);
780 isWeightsForDrop =
false;
789 size_t numThreads = std::thread::hardware_concurrency ();
790 size_t patternPerThread = testPattern.size () / numThreads;
791 std::vector<Batch> batches;
792 auto itPat = testPattern.begin ();
794 for (
size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
796 batches.push_back (
Batch (itPat, itPat + patternPerThread));
797 itPat += patternPerThread;
799 if (itPat != testPattern.end ())
800 batches.push_back (
Batch (itPat, testPattern.end ()));
802 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
803 for (
auto& batch : batches)
807 std::async (std::launch::async, [&]()
809 std::vector<double> localOutput;
811 double testBatchError = (*this) (passThrough, weights,
ModeOutput::FETCH, localOutput);
812 return std::make_tuple (testBatchError, localOutput);
817 auto itBatch = batches.begin ();
818 for (
auto& f : futures)
820 std::tuple<double,std::vector<double>> result = f.get ();
821 testError += std::get<0>(result) / batches.size ();
822 std::vector<double>
output = std::get<1>(result);
823 if (output.size() == (outputSize() - 1) * itBatch->size())
825 auto output_iterator = output.begin();
826 for (
auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
828 for (
size_t output_index = 1; output_index < outputSize(); ++output_index)
830 settings.
testSample (0, *output_iterator, (*pattern_it).output ().at (0),
831 (*pattern_it).weight ());
842 std::vector<double>
output;
848 Batch batch (begin (testPattern), end (testPattern));
852 if (output.size() == (outputSize() - 1) * batch.
size())
854 auto output_iterator = output.begin();
855 for (
auto pattern_it = batch.
begin(); pattern_it != batch.
end(); ++pattern_it)
857 for (
size_t output_index = 1; output_index < outputSize(); ++output_index)
859 settings.
testSample (0, *output_iterator, (*pattern_it).output ().at (0),
860 (*pattern_it).weight ());
865 testError += testPatternError;
875 if (!hasConverged && !isWeightsForDrop)
877 dropOutWeightFactor (weights, dropFractions,
true);
878 isWeightsForDrop =
true;
882 ++dropOutChangeCount;
885 static double x = -1.0;
888 settings.
addPoint (
"trainErrors", cycleCount, trainError);
889 settings.
addPoint (
"testErrors", cycleCount, testError);
890 settings.
plot (
"trainErrors",
"C", 1,
kBlue);
896 fInteractive->AddPoint(cycleCount, trainError, testError);
897 if (*fExitFromTraining)
break;
904 if ((
int)cycleCount % 10 == 0) {
906 TString convText =
Form(
"(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
913 settings.
cycle (progress, convText);
919 TString convText =
Form(
"(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (
int)cycleCount);
921 settings.
cycle (progress, convText);
939 template <
typename Iterator,
typename Minimizer>
944 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
945 size_t numBatches = numPattern/settings.
batchSize ();
946 size_t numBatches_stored = numBatches;
948 std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});
949 Iterator itPatternBatchBegin = itPatternBegin;
950 Iterator itPatternBatchEnd = itPatternBatchBegin;
953 std::vector<Batch> batches;
954 while (numBatches > 0)
956 std::advance (itPatternBatchEnd, settings.
batchSize ());
957 batches.push_back (
Batch (itPatternBatchBegin, itPatternBatchEnd));
958 itPatternBatchBegin = itPatternBatchEnd;
963 if (itPatternBatchEnd != itPatternEnd)
964 batches.push_back (
Batch (itPatternBatchEnd, itPatternEnd));
971 size_t numThreads = std::thread::hardware_concurrency ();
972 size_t batchesPerThread = batches.size () / numThreads;
973 typedef std::vector<Batch>::iterator batch_iterator;
974 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
975 batch_iterator itBatchBegin = std::begin (batches);
976 batch_iterator itBatchCurrEnd = std::begin (batches);
977 batch_iterator itBatchEnd = std::end (batches);
978 for (
size_t iT = 0; iT < numThreads; ++iT)
980 if (iT == numThreads-1)
981 itBatchCurrEnd = itBatchEnd;
983 std::advance (itBatchCurrEnd, batchesPerThread);
984 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
985 itBatchBegin = itBatchCurrEnd;
989 std::vector<std::future<double>> futures;
990 for (
auto& batchRange : batchVec)
994 std::async (std::launch::async, [&]()
996 double localError = 0.0;
997 for (
auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
1000 pass_through_type settingsAndBatch (settings, batch, dropContainer);
1001 Minimizer minimizerClone (minimizer);
1002 localError += minimizerClone ((*this), weights, settingsAndBatch);
1009 for (
auto& f : futures)
1014 for (
auto& batch : batches)
1016 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1017 error += minimizer ((*
this), weights, settingsAndBatch);
1021 numBatches_stored = std::max (numBatches_stored,
size_t(1));
1022 error /= numBatches_stored;
1037 template <
typename Weights>
1038 std::vector<double>
Net::compute (
const std::vector<double>& input,
const Weights& weights)
const 1040 std::vector<LayerData> layerData;
1041 layerData.reserve (m_layers.size ()+1);
1042 auto itWeight = begin (weights);
1043 auto itInputBegin = begin (input);
1044 auto itInputEnd = end (input);
1045 layerData.push_back (
LayerData (itInputBegin, itInputEnd));
1046 size_t numNodesPrev = input.size ();
1049 for (
auto& layer: m_layers)
1051 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1052 layer.activationFunction (),
1053 layer.modeOutputValues ()));
1054 size_t _numWeights = layer.numWeights (numNodesPrev);
1055 itWeight += _numWeights;
1056 numNodesPrev = layer.numNodes ();
1061 forwardPattern (m_layers, layerData);
1064 std::vector<double>
output;
1065 fetchOutput (layerData.back (),
output);
1070 template <
typename Weights,
typename PassThrough>
1073 std::vector<double> nothing;
1074 assert (numWeights () == weights.size ());
1075 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing,
false);
1079 template <
typename Weights,
typename PassThrough,
typename OutContainer>
1082 std::vector<double> nothing;
1083 assert (numWeights () == weights.size ());
1084 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer,
true);
1089 template <
typename Weights,
typename Gradients,
typename PassThrough>
1090 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients)
const 1092 std::vector<double> nothing;
1093 assert (numWeights () == weights.size ());
1094 assert (weights.size () == gradients.size ());
1095 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing,
false);
1099 template <
typename Weights,
typename Gradients,
typename PassThrough,
typename OutContainer>
1100 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients,
ModeOutput eFetch, OutContainer& outputContainer)
const 1103 assert (numWeights () == weights.size ());
1104 assert (weights.size () == gradients.size ());
1105 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer,
true);
1111 template <
typename LayerContainer,
typename DropContainer,
typename ItWeight,
typename ItGradient>
1115 ItWeight itWeightBegin,
1117 ItGradient itGradientBegin,
1118 ItGradient itGradientEnd,
1119 size_t& totalNumWeights)
const 1122 bool usesDropOut = !dropContainer.empty ();
1124 itDropOut = std::begin (dropContainer);
1126 if (_layers.empty ())
1127 throw std::string (
"no layers in this net");
1133 totalNumWeights = 0;
1134 size_t totalNumNodes = 0;
1135 std::vector<std::vector<LayerData>> layerPatternData;
1136 layerPatternData.reserve (_layers.size ()+1);
1137 ItWeight itWeight = itWeightBegin;
1138 ItGradient itGradient = itGradientBegin;
1139 size_t numNodesPrev = inputSize ();
1150 layerPatternData.push_back (std::vector<LayerData>());
1151 for (
const Pattern& _pattern : batch)
1153 std::vector<LayerData>& layerData = layerPatternData.back ();
1154 layerData.push_back (
LayerData (numNodesPrev));
1156 itInputBegin = _pattern.beginInput ();
1157 itInputEnd = _pattern.endInput ();
1158 layerData.back ().setInput (itInputBegin, itInputEnd);
1161 layerData.back ().setDropOut (itDropOut);
1167 itDropOut += _layers.back ().numNodes ();
1171 for (
auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1173 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1174 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1176 auto& layer = *itLayer;
1177 layerPatternData.push_back (std::vector<LayerData>());
1179 for (
const Pattern& _pattern : batch)
1181 std::vector<LayerData>& layerData = layerPatternData.back ();
1184 if (itGradientBegin == itGradientEnd)
1186 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1187 layer.activationFunction (),
1188 layer.modeOutputValues ()));
1192 layerData.push_back (
LayerData (layer.numNodes (), itWeight, itGradient,
1193 layer.activationFunction (),
1194 layer.inverseActivationFunction (),
1195 layer.modeOutputValues ()));
1200 layerData.back ().setDropOut (itDropOut);
1207 itDropOut += layer.numNodes ();
1209 size_t _numWeights = layer.numWeights (numNodesPrev);
1210 totalNumWeights += _numWeights;
1211 itWeight += _numWeights;
1212 itGradient += _numWeights;
1213 numNodesPrev = layer.numNodes ();
1214 totalNumNodes += numNodesPrev;
1217 assert (totalNumWeights > 0);
1218 return layerPatternData;
1223 template <
typename LayerContainer>
1225 std::vector<LayerData>& layerData)
const 1227 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1228 size_t cumulativeNodeCount = 0;
1229 for (; idxLayer < idxLayerEnd; ++idxLayer)
1231 LayerData& prevLayerData = layerData.at (idxLayer);
1232 LayerData& currLayerData = layerData.at (idxLayer+1);
1234 forward (prevLayerData, currLayerData);
1243 template <
typename LayerContainer,
typename LayerPatternContainer>
1245 LayerPatternContainer& layerPatternData,
1246 std::vector<double>& valuesMean,
1247 std::vector<double>& valuesStdDev,
1248 size_t trainFromLayer)
const 1250 valuesMean.clear ();
1251 valuesStdDev.clear ();
1254 size_t cumulativeNodeCount = 0;
1255 for (
size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1257 bool doTraining = idxLayer >= trainFromLayer;
1260 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1261 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1263 size_t numPattern = prevLayerPatternData.size ();
1264 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1266 std::vector<MeanVariance> means (numNodesLayer);
1268 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1270 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1271 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1274 forward (prevLayerData, currLayerData);
1278 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1281 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1291 cumulativeNodeCount += numNodesLayer;
1298 template <
typename OutputContainer>
1304 outputContainer.insert (outputContainer.end (), lastLayerData.
valuesBegin (), lastLayerData.
valuesEnd ());
1310 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1319 template <
typename OutputContainer>
1320 void Net::fetchOutput (
const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer)
const 1322 for (
const LayerData& lastLayerData : lastLayerPatternData)
1323 fetchOutput (lastLayerData, outputContainer);
1328 template <
typename ItWeight>
1330 std::vector<LayerData>& lastLayerData,
1332 ItWeight itWeightBegin,
1333 ItWeight itWeightEnd)
const 1335 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1338 typename std::vector<Pattern>::const_iterator itPattern = batch.
begin ();
1339 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.
end ();
1341 double sumWeights (0.0);
1342 double sumError (0.0);
1344 size_t idxPattern = 0;
1346 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1352 const Pattern& _pattern = (*itPattern);
1353 double error = errorFunction (layerData, _pattern.
output (),
1354 itWeightBegin, itWeightEnd,
1360 return std::make_tuple (sumError, sumWeights);
1365 template <
typename Settings>
1368 size_t trainFromLayer,
1369 size_t totalNumWeights)
const 1371 bool doTraining = layerPatternData.size () > trainFromLayer;
1375 size_t idxLayer = layerPatternData.size ();
1376 for (
auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1377 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1380 if (idxLayer <= trainFromLayer)
1383 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1384 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1386 size_t idxPattern = 0;
1388 for (
typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1389 itPrevLayerData = begin (prevLayerDataColl) ;
1390 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1392 LayerData& currLayerData = (*itCurrLayerData);
1393 LayerData& prevLayerData = *(itPrevLayerData);
1395 backward (prevLayerData, currLayerData);
1416 template <
typename LayerContainer,
typename PassThrough,
typename ItWeight,
typename ItGradient,
typename OutContainer>
1418 ItWeight itWeightBegin, ItWeight itWeightEnd,
1419 ItGradient itGradientBegin, ItGradient itGradientEnd,
1420 size_t trainFromLayer,
1421 OutContainer& outputContainer,
bool doFetchOutput)
const 1423 Settings& settings = std::get<0>(settingsAndBatch);
1424 Batch& batch = std::get<1>(settingsAndBatch);
1425 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1427 double sumError = 0.0;
1428 double sumWeights = 0.0;
1432 size_t totalNumWeights (0);
1433 std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1445 std::vector<double> valuesMean;
1446 std::vector<double> valuesStdDev;
1447 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1453 fetchOutput (layerPatternData.back (), outputContainer);
1458 std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1462 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1466 double batchSize = std::distance (std::begin (batch), std::end (batch));
1467 for (
auto it = itGradientBegin; it != itGradientEnd; ++it)
1471 sumError /= sumWeights;
1481 template <
typename OutIterator>
1487 int numInput = inputSize ();
1494 for (
auto& layer: layers ())
1496 double nIn = numInput;
1497 double stdDev =
sqrt (2.0/nIn);
1498 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1503 numInput = layer.numNodes ();
1511 int numInput = inputSize ();
1518 for (
auto& layer: layers ())
1520 double nIn = numInput;
1521 double minVal = -
sqrt(2.0/nIn);
1522 double maxVal =
sqrt (2.0/nIn);
1523 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1529 numInput = layer.numNodes ();
1537 int numInput = inputSize ();
1544 for (
auto& layer: layers ())
1547 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1552 numInput = layer.numNodes ();
1560 int numInput = inputSize ();
1567 for (
auto& layer: layers ())
1569 double nIn = numInput;
1570 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1575 numInput = layer.numNodes ();
1590 template <
typename Container,
typename ItWeight>
1594 ItWeight itWeightEnd,
1595 double patternWeight,
1596 double factorWeightDecay,
1600 switch (m_eErrorFunction)
1613 std::vector<double> probabilities = layerData.
probabilities ();
1614 error =
crossEntropy (begin (probabilities), end (probabilities),
1615 begin (truth), end (truth),
1623 std::cout <<
"softmax." << std::endl;
1625 std::vector<double> probabilities = layerData.
probabilities ();
1627 begin (truth), end (truth),
1636 error =
weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
void addPoint(std::string histoName, double x)
for monitoring
static std::shared_ptr< std::function< double(double)> > InvGauss
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
size_t convergenceCount() const
returns the current convergence count
virtual void cycle(double progress, TString text)
static std::shared_ptr< std::function< double(double)> > Tanh
std::vector< char > DropContainer
static std::shared_ptr< std::function< double(double)> > InvReLU
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
bool isFlagSet(T flag, T value)
static std::shared_ptr< std::function< double(double)> > InvTanh
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
size_t convergenceSteps() const
how many steps until training is deemed to have converged
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
std::vector< double > & output()
const std::vector< double > & dropFractions() const
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
EnumRegularization regularization() const
some regularization of the DNN is turned on?
bool useMultithreading() const
is multithreading turned on?
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
virtual void testIteration()
callback for monitoring and loggging
static std::shared_ptr< std::function< double(double)> > InvSoftSign
static std::shared_ptr< std::function< double(double)> > TanhShift
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
static std::shared_ptr< std::function< double(double)> > Sigmoid
virtual void startTestCycle()
callback for monitoring and loggging
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ModeOutputValues outputMode() const
returns the output mode
std::shared_ptr< std::function< double(double)> > activationFunction() const
double pow(double, double)
static std::shared_ptr< std::function< double(double)> > SymmReLU
const_iterator end() const
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
void create(std::string histoName, int bins, double min, double max)
for monitoring
T uniformFromTo(T from, T to)
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
size_t testRepetitions() const
how often is the test data tested
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
size_t dropRepetitions() const
container_type probabilities() const
computes the probabilities from the current node values and returns them
Double_t(* Function)(Double_t)
double factorWeightDecay() const
get the weight-decay factor
virtual void endTrainCycle(double)
callback for monitoring and logging
RooCmdArg Minimizer(const char *type, const char *alg=0)
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
char * Form(const char *fmt,...)
static std::shared_ptr< std::function< double(double)> > InvSoftPlus
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
void pads(int numPads)
preparation for monitoring
std::vector< double >::const_iterator const_iterator
static std::shared_ptr< std::function< double(double)> > SoftSign
Settings for the training of the neural net.
virtual void startTrainCycle()
WeightInitializationStrategy
weight initialization strategies to be chosen from
static std::shared_ptr< std::function< double(double)> > ReLU
static std::shared_ptr< std::function< double(double)> > InvSigmoid
std::vector< double > m_localGradients
local gradients for reuse in thread.
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
static std::shared_ptr< std::function< double(double)> > GaussComplement
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
double gaussDouble(double mean, double sigma)
The Batch class encapsulates one mini-batch.
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
double m_beta
internal parameter (momentum)
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
size_t maxConvergenceCount() const
returns the max convergence count so far
static RooMathCoreReg dummy
static std::shared_ptr< std::function< double(double)> > Gauss
static constexpr double s
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
double uniformDouble(double minValue, double maxValue)
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
static std::shared_ptr< std::function< double(double)> > Linear
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Abstract ClassifierFactory template that handles arbitrary types.
static std::shared_ptr< std::function< double(double)> > InvGaussComplement
static std::shared_ptr< std::function< double(double)> > InvLinear
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
size_t batchSize() const
mini-batch size
virtual void endTestCycle()
callback for monitoring and loggging
static std::shared_ptr< std::function< double(double)> > InvTanhShift
std::vector< double > m_localWeights
local weights for reuse in thread.
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
void backPropagate(std::vector< std::vector< LayerData >> &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
const_iterator begin() const
DropContainer::const_iterator const_dropout_iterator
double m_alpha
internal parameter (learningRate)
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
static std::shared_ptr< std::function< double(double)> > SoftPlus
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
static std::shared_ptr< std::function< double(double)> > ZeroFnc
virtual bool hasConverged(double testError)
has this training converged already?
LayerData holds the data of one layer.
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
static constexpr double g
static std::shared_ptr< std::function< double(double)> > InvSymmReLU