1 #ifndef TMVA_NEURAL_NET_I 2 #define TMVA_NEURAL_NET_I 4 #ifndef TMVA_NEURAL_NET 5 #error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead." 6 #endif // TMVA_NEURAL_NET 8 #pragma GCC diagnostic ignored "-Wunused-variable" 33 return from + (rand ()* (to - from)/RAND_MAX);
38 template <
typename Container,
typename T>
41 for (
auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
49 static std::shared_ptr<std::function<double(double)>>
ZeroFnc = std::make_shared<std::function<double(double)>> ([](
double ){
return 0; });
52 static std::shared_ptr<std::function<double(double)>>
Sigmoid = std::make_shared<std::function<double(double)>> ([](
double value){ value = std::max (-100.0, std::min (100.0,value));
return 1.0/(1.0 +
std::exp (-value)); });
53 static std::shared_ptr<std::function<double(double)>>
InvSigmoid = std::make_shared<std::function<double(double)>> ([](
double value){
double s = (*Sigmoid.get ()) (value);
return s*(1.0-s); });
55 static std::shared_ptr<std::function<double(double)>>
Tanh = std::make_shared<std::function<double(double)>> ([](
double value){
return tanh (value); });
56 static std::shared_ptr<std::function<double(double)>>
InvTanh = std::make_shared<std::function<double(double)>> ([](
double value){
return 1.0 -
std::pow (value, 2.0); });
58 static std::shared_ptr<std::function<double(double)>>
Linear = std::make_shared<std::function<double(double)>> ([](
double value){
return value; });
59 static std::shared_ptr<std::function<double(double)>>
InvLinear = std::make_shared<std::function<double(double)>> ([](
double ){
return 1.0; });
61 static std::shared_ptr<std::function<double(double)>>
SymmReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.3;
return value > margin ? value-margin : value < -margin ? value+margin : 0; });
62 static std::shared_ptr<std::function<double(double)>>
InvSymmReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.3;
return value > margin ? 1.0 : value < -margin ? 1.0 : 0; });
64 static std::shared_ptr<std::function<double(double)>>
ReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.0;
return value > margin ? value-margin : 0; });
65 static std::shared_ptr<std::function<double(double)>>
InvReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.0;
return value > margin ? 1.0 : 0; });
67 static std::shared_ptr<std::function<double(double)>>
SoftPlus = std::make_shared<std::function<double(double)>> ([](
double value){
return std::log (1.0+
std::exp (value)); });
68 static std::shared_ptr<std::function<double(double)>>
InvSoftPlus = std::make_shared<std::function<double(double)>> ([](
double value){
return 1.0 / (1.0 +
std::exp (-value)); });
70 static std::shared_ptr<std::function<double(double)>>
TanhShift = std::make_shared<std::function<double(double)>> ([](
double value){
return tanh (value-0.3); });
71 static std::shared_ptr<std::function<double(double)>>
InvTanhShift = std::make_shared<std::function<double(double)>> ([](
double value){
return 0.3 + (1.0 -
std::pow (value, 2.0)); });
73 static std::shared_ptr<std::function<double(double)>>
SoftSign = std::make_shared<std::function<double(double)>> ([](
double value){
return value / (1.0 +
fabs (value)); });
74 static std::shared_ptr<std::function<double(double)>>
InvSoftSign = std::make_shared<std::function<double(double)>> ([](
double value){
return std::pow ((1.0 -
fabs (value)),2.0); });
76 static std::shared_ptr<std::function<double(double)>>
Gauss = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return exp (-
std::pow(value*s,2.0)); });
77 static std::shared_ptr<std::function<double(double)>>
InvGauss = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return -2.0 * value * s*s * (*Gauss.get ()) (value); });
79 static std::shared_ptr<std::function<double(double)>>
GaussComplement = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return 1.0 -
exp (-
std::pow(value*s,2.0)); });
80 static std::shared_ptr<std::function<double(double)>>
InvGaussComplement = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return +2.0 * value * s*s * (*GaussComplement.get ()) (value); });
88 template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItTarget,
typename ItDrop>
91 ItTarget itTargetBegin, ItTarget itTargetEnd,
94 for (
auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
96 for (
auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
98 if (!HasDropOut || *itDrop)
99 (*itTarget) += (*itSource) * (*itWeight);
102 if (HasDropOut) ++itDrop;
115 template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItPrev,
typename ItDrop>
118 ItPrev itPrevBegin, ItPrev itPrevEnd,
121 for (
auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
123 for (
auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
125 if (!HasDropOut || *itDrop)
126 (*itPrev) += (*itCurr) * (*itWeight);
129 if (HasDropOut) ++itDrop;
144 template <
typename ItValue,
typename Fnc>
147 while (itValue != itValueEnd)
149 auto& value = (*itValue);
150 value = (*fnc.get ()) (value);
161 template <
typename ItValue,
typename Fnc,
typename InvFnc,
typename ItGradient>
162 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
164 while (itValue != itValueEnd)
166 auto& value = (*itValue);
167 value = (*fnc.get ()) (value);
168 (*itGradient) = (*invFnc.get ()) (value);
170 ++itValue; ++itGradient;
180 template <
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient>
181 void update (ItSource itSource, ItSource itSourceEnd,
182 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
183 ItTargetGradient itTargetGradientBegin,
184 ItGradient itGradient)
186 while (itSource != itSourceEnd)
188 auto itTargetDelta = itTargetDeltaBegin;
189 auto itTargetGradient = itTargetGradientBegin;
190 while (itTargetDelta != itTargetDeltaEnd)
192 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
193 ++itTargetDelta; ++itTargetGradient; ++itGradient;
206 template <EnumRegularization Regularization>
217 inline double computeRegularization<EnumRegularization::L1> (
double weight,
const double& factorWeightDecay)
219 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
224 inline double computeRegularization<EnumRegularization::L2> (
double weight,
const double& factorWeightDecay)
226 return factorWeightDecay * weight;
234 template <EnumRegularization Regularization,
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient,
typename ItWeight>
235 void update (ItSource itSource, ItSource itSourceEnd,
236 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
237 ItTargetGradient itTargetGradientBegin,
238 ItGradient itGradient,
242 while (itSource != itSourceEnd)
244 auto itTargetDelta = itTargetDeltaBegin;
245 auto itTargetGradient = itTargetGradientBegin;
246 while (itTargetDelta != itTargetDeltaEnd)
248 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,
weightDecay);
249 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
260 #define USELOCALWEIGHTS 1 268 template <
typename Function,
typename Weights,
typename PassThrough>
271 size_t numWeights = weights.size ();
286 size_t currentRepetition = 0;
299 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
302 (*itLocWeight) += (*itPrevG);
315 double maxGrad = 0.0;
316 for (; itG != itGEnd; ++itG, ++itPrevG)
318 double currGrad = (*itG);
319 double prevGrad = (*itPrevG);
323 currGrad += prevGrad;
325 (*itPrevG) = currGrad;
334 std::cout <<
"\nlearning rate reduced to " <<
m_alpha << std::endl;
335 std::for_each (weights.begin (), weights.end (), [maxGrad](
double& w)
343 auto itW = std::begin (weights);
379 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename InvFnc>
380 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc,
double patternWeight)
382 double errorSum = 0.0;
385 ItTruth itTruth = itTruthBegin;
386 bool hasDeltas = (itDelta != itDeltaEnd);
387 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
390 double output = (*itOutput);
391 double error = output - (*itTruth);
394 (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
397 errorSum += error*error * patternWeight;
409 template <
typename ItProbability,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
410 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
412 bool hasDeltas = (itDelta != itDeltaEnd);
414 double errorSum = 0.0;
415 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
417 double probability = *itProbability;
418 double truth = *itTruthBegin;
421 truth = truth < 0.5 ? 0.1 : 0.9;
424 double delta = probability - truth;
425 (*itDelta) = delta*patternWeight;
430 if (probability == 0)
435 else if (probability == 1)
441 error += - (truth *
log (probability) + (1.0-truth) *
log (1.0-probability));
442 errorSum += error * patternWeight;
455 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
456 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
458 double errorSum = 0.0;
460 bool hasDeltas = (itDelta != itDeltaEnd);
462 ItTruth itTruth = itTruthBegin;
463 for (
auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
466 double probability = (*itProbability);
467 double truth = (*itTruth);
470 (*itDelta) = probability - truth;
476 error += truth *
log (probability);
480 return -errorSum * patternWeight;
495 template <
typename ItWeight>
503 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
505 double weight = (*itWeight);
508 return error + 0.5 * w * factorWeightDecay /
n;
515 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
517 double weight = (*itWeight);
520 return error + 0.5 * w * factorWeightDecay /
n;
543 template <
typename LAYERDATA>
544 void forward (
const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
546 if (prevLayerData.hasDropOut ())
548 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
549 currLayerData.weightsBegin (),
550 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
551 prevLayerData.dropOut ());
556 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
557 currLayerData.weightsBegin (),
558 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
569 template <
typename LAYERDATA>
570 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
572 if (prevLayerData.hasDropOut ())
574 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
575 currLayerData.weightsBegin (),
576 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
577 prevLayerData.dropOut ());
582 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
583 currLayerData.weightsBegin (),
584 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
597 template <
typename LAYERDATA>
601 if (factorWeightDecay != 0.0)
604 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
605 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
606 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
607 currLayerData.weightsBegin (), factorWeightDecay);
611 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
612 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
613 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
614 currLayerData.weightsBegin (), factorWeightDecay);
618 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
619 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
620 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
625 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
626 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
627 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
649 template <
typename WeightsType,
typename DropProbabilities>
651 const DropProbabilities& drops,
654 if (drops.empty () || weights.empty ())
657 auto itWeight = std::begin (weights);
658 auto itWeightEnd = std::end (weights);
659 auto itDrop = std::begin (drops);
660 auto itDropEnd = std::end (drops);
661 size_t numNodesPrev = inputSize ();
662 double dropFractionPrev = *itDrop;
665 for (
auto& layer : layers ())
667 if (itDrop == itDropEnd)
670 size_t _numNodes = layer.numNodes ();
672 double dropFraction = *itDrop;
673 double pPrev = 1.0 - dropFractionPrev;
674 double p = 1.0 - dropFraction;
681 size_t _numWeights = layer.numWeights (numNodesPrev);
682 for (
size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
684 if (itWeight == itWeightEnd)
690 numNodesPrev = _numNodes;
691 dropFractionPrev = dropFraction;
709 template <
typename Minimizer>
711 std::vector<Pattern>& trainPattern,
712 const std::vector<Pattern>& testPattern,
720 if (fIPyMaxIter) *fIPyMaxIter = 100;
723 settings.
create (
"trainErrors", 100, 0, 100, 100, 0,1);
724 settings.
create (
"testErrors", 100, 0, 100, 100, 0,1);
726 size_t cycleCount = 0;
727 size_t testCycleCount = 0;
728 double testError = 1e20;
729 double trainError = 1e20;
730 size_t dropOutChangeCount = 0;
734 const std::vector<double>& dropFractions = settings.
dropFractions ();
735 bool isWeightsForDrop =
false;
744 size_t dropIndex = 0;
745 if (!dropFractions.empty () && dropOutChangeCount % settings.
dropRepetitions () == 0)
748 dropContainer.clear ();
749 size_t _numNodes = inputSize ();
750 double dropFraction = 0.0;
751 dropFraction = dropFractions.at (dropIndex);
753 fillDropContainer (dropContainer, dropFraction, _numNodes);
754 for (
auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
756 auto& layer = *itLayer;
757 _numNodes = layer.numNodes ();
760 if (dropFractions.size () > dropIndex)
761 dropFraction = dropFractions.at (dropIndex);
763 fillDropContainer (dropContainer, dropFraction, _numNodes);
765 isWeightsForDrop =
true;
769 trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
773 bool hasConverged =
false;
776 if (isWeightsForDrop)
778 dropOutWeightFactor (weights, dropFractions);
779 isWeightsForDrop =
false;
788 size_t numThreads = std::thread::hardware_concurrency ();
789 size_t patternPerThread = testPattern.size () / numThreads;
790 std::vector<Batch> batches;
791 auto itPat = testPattern.begin ();
793 for (
size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
795 batches.push_back (
Batch (itPat, itPat + patternPerThread));
796 itPat += patternPerThread;
798 if (itPat != testPattern.end ())
799 batches.push_back (
Batch (itPat, testPattern.end ()));
801 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
802 for (
auto& batch : batches)
806 std::async (std::launch::async, [&]()
808 std::vector<double> localOutput;
810 double testBatchError = (*this) (passThrough, weights,
ModeOutput::FETCH, localOutput);
811 return std::make_tuple (testBatchError, localOutput);
816 auto itBatch = batches.begin ();
817 for (
auto&
f : futures)
819 std::tuple<double,std::vector<double>>
result =
f.get ();
820 testError += std::get<0>(
result) / batches.size ();
822 if (output.size() == (outputSize() - 1) * itBatch->size())
824 auto output_iterator = output.begin();
825 for (
auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
827 for (
size_t output_index = 1; output_index < outputSize(); ++output_index)
829 settings.
testSample (0, *output_iterator, (*pattern_it).output ().at (0),
830 (*pattern_it).weight ());
841 std::vector<double>
output;
847 Batch batch (begin (testPattern), end (testPattern));
851 if (output.size() == (outputSize() - 1) * batch.
size())
853 auto output_iterator = output.begin();
854 for (
auto pattern_it = batch.
begin(); pattern_it != batch.
end(); ++pattern_it)
856 for (
size_t output_index = 1; output_index < outputSize(); ++output_index)
858 settings.
testSample (0, *output_iterator, (*pattern_it).output ().at (0),
859 (*pattern_it).weight ());
864 testError += testPatternError;
874 if (!hasConverged && !isWeightsForDrop)
876 dropOutWeightFactor (weights, dropFractions,
true);
877 isWeightsForDrop =
true;
881 ++dropOutChangeCount;
884 static double x = -1.0;
887 settings.
addPoint (
"trainErrors", cycleCount, trainError);
888 settings.
addPoint (
"testErrors", cycleCount, testError);
889 settings.
plot (
"trainErrors",
"C", 1,
kBlue);
895 fInteractive->AddPoint(cycleCount, trainError, testError);
896 if (*fExitFromTraining)
break;
903 if ((
int)cycleCount % 10 == 0) {
905 TString convText =
Form(
"(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
912 settings.
cycle (progress, convText);
918 TString convText =
Form(
"(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (
int)cycleCount);
920 settings.
cycle (progress, convText);
938 template <
typename Iterator,
typename Minimizer>
943 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
944 size_t numBatches = numPattern/settings.
batchSize ();
945 size_t numBatches_stored = numBatches;
947 std::random_shuffle (itPatternBegin, itPatternEnd);
948 Iterator itPatternBatchBegin = itPatternBegin;
949 Iterator itPatternBatchEnd = itPatternBatchBegin;
952 std::vector<Batch> batches;
953 while (numBatches > 0)
955 std::advance (itPatternBatchEnd, settings.
batchSize ());
956 batches.push_back (
Batch (itPatternBatchBegin, itPatternBatchEnd));
957 itPatternBatchBegin = itPatternBatchEnd;
962 if (itPatternBatchEnd != itPatternEnd)
963 batches.push_back (
Batch (itPatternBatchEnd, itPatternEnd));
970 size_t numThreads = std::thread::hardware_concurrency ();
971 size_t batchesPerThread = batches.size () / numThreads;
972 typedef std::vector<Batch>::iterator batch_iterator;
973 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
974 batch_iterator itBatchBegin = std::begin (batches);
975 batch_iterator itBatchCurrEnd = std::begin (batches);
976 batch_iterator itBatchEnd = std::end (batches);
977 for (
size_t iT = 0; iT < numThreads; ++iT)
979 if (iT == numThreads-1)
980 itBatchCurrEnd = itBatchEnd;
982 std::advance (itBatchCurrEnd, batchesPerThread);
983 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
984 itBatchBegin = itBatchCurrEnd;
988 std::vector<std::future<double>> futures;
989 for (
auto& batchRange : batchVec)
993 std::async (std::launch::async, [&]()
995 double localError = 0.0;
996 for (
auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
999 pass_through_type settingsAndBatch (settings, batch, dropContainer);
1000 Minimizer minimizerClone (minimizer);
1001 localError += minimizerClone ((*this), weights, settingsAndBatch);
1008 for (
auto&
f : futures)
1013 for (
auto& batch : batches)
1015 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1016 error += minimizer ((*
this), weights, settingsAndBatch);
1020 numBatches_stored = std::max (numBatches_stored,
size_t(1));
1021 error /= numBatches_stored;
1036 template <
typename Weights>
1037 std::vector<double>
Net::compute (
const std::vector<double>& input,
const Weights& weights)
const 1039 std::vector<LayerData> layerData;
1040 layerData.reserve (m_layers.size ()+1);
1041 auto itWeight = begin (weights);
1042 auto itInputBegin = begin (input);
1043 auto itInputEnd = end (input);
1044 layerData.push_back (
LayerData (itInputBegin, itInputEnd));
1045 size_t numNodesPrev = input.size ();
1048 for (
auto& layer: m_layers)
1050 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1051 layer.activationFunction (),
1052 layer.modeOutputValues ()));
1053 size_t _numWeights = layer.numWeights (numNodesPrev);
1054 itWeight += _numWeights;
1055 numNodesPrev = layer.numNodes ();
1060 forwardPattern (m_layers, layerData);
1063 std::vector<double>
output;
1064 fetchOutput (layerData.back (),
output);
1069 template <
typename Weights,
typename PassThrough>
1072 std::vector<double> nothing;
1073 assert (numWeights () == weights.size ());
1074 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing,
false);
1078 template <
typename Weights,
typename PassThrough,
typename OutContainer>
1081 std::vector<double> nothing;
1082 assert (numWeights () == weights.size ());
1083 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer,
true);
1088 template <
typename Weights,
typename Gradients,
typename PassThrough>
1089 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients)
const 1091 std::vector<double> nothing;
1092 assert (numWeights () == weights.size ());
1093 assert (weights.size () == gradients.size ());
1094 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing,
false);
1098 template <
typename Weights,
typename Gradients,
typename PassThrough,
typename OutContainer>
1099 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients,
ModeOutput eFetch, OutContainer& outputContainer)
const 1102 assert (numWeights () == weights.size ());
1103 assert (weights.size () == gradients.size ());
1104 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer,
true);
1110 template <
typename LayerContainer,
typename DropContainer,
typename ItWeight,
typename ItGradient>
1114 ItWeight itWeightBegin,
1116 ItGradient itGradientBegin,
1117 ItGradient itGradientEnd,
1118 size_t& totalNumWeights)
const 1121 bool usesDropOut = !dropContainer.empty ();
1123 itDropOut = std::begin (dropContainer);
1125 if (_layers.empty ())
1126 throw std::string (
"no layers in this net");
1132 totalNumWeights = 0;
1133 size_t totalNumNodes = 0;
1134 std::vector<std::vector<LayerData>> layerPatternData;
1135 layerPatternData.reserve (_layers.size ()+1);
1136 ItWeight itWeight = itWeightBegin;
1137 ItGradient itGradient = itGradientBegin;
1138 size_t numNodesPrev = inputSize ();
1149 layerPatternData.push_back (std::vector<LayerData>());
1150 for (
const Pattern& _pattern : batch)
1152 std::vector<LayerData>& layerData = layerPatternData.back ();
1153 layerData.push_back (
LayerData (numNodesPrev));
1155 itInputBegin = _pattern.beginInput ();
1156 itInputEnd = _pattern.endInput ();
1157 layerData.back ().setInput (itInputBegin, itInputEnd);
1160 layerData.back ().setDropOut (itDropOut);
1166 itDropOut += _layers.back ().numNodes ();
1170 for (
auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1172 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1173 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1175 auto& layer = *itLayer;
1176 layerPatternData.push_back (std::vector<LayerData>());
1178 for (
const Pattern& _pattern : batch)
1180 std::vector<LayerData>& layerData = layerPatternData.back ();
1183 if (itGradientBegin == itGradientEnd)
1185 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1186 layer.activationFunction (),
1187 layer.modeOutputValues ()));
1191 layerData.push_back (
LayerData (layer.numNodes (), itWeight, itGradient,
1192 layer.activationFunction (),
1193 layer.inverseActivationFunction (),
1194 layer.modeOutputValues ()));
1199 layerData.back ().setDropOut (itDropOut);
1206 itDropOut += layer.numNodes ();
1208 size_t _numWeights = layer.numWeights (numNodesPrev);
1209 totalNumWeights += _numWeights;
1210 itWeight += _numWeights;
1211 itGradient += _numWeights;
1212 numNodesPrev = layer.numNodes ();
1213 totalNumNodes += numNodesPrev;
1216 assert (totalNumWeights > 0);
1217 return layerPatternData;
1222 template <
typename LayerContainer>
1224 std::vector<LayerData>& layerData)
const 1226 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1227 size_t cumulativeNodeCount = 0;
1228 for (; idxLayer < idxLayerEnd; ++idxLayer)
1230 LayerData& prevLayerData = layerData.at (idxLayer);
1231 LayerData& currLayerData = layerData.at (idxLayer+1);
1233 forward (prevLayerData, currLayerData);
1242 template <
typename LayerContainer,
typename LayerPatternContainer>
1244 LayerPatternContainer& layerPatternData,
1245 std::vector<double>& valuesMean,
1246 std::vector<double>& valuesStdDev,
1247 size_t trainFromLayer)
const 1249 valuesMean.clear ();
1250 valuesStdDev.clear ();
1253 size_t cumulativeNodeCount = 0;
1254 for (
size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1256 bool doTraining = idxLayer >= trainFromLayer;
1259 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1260 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1262 size_t numPattern = prevLayerPatternData.size ();
1263 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1265 std::vector<MeanVariance> means (numNodesLayer);
1267 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1269 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1270 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1273 forward (prevLayerData, currLayerData);
1277 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1280 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1290 cumulativeNodeCount += numNodesLayer;
1297 template <
typename OutputContainer>
1303 outputContainer.insert (outputContainer.end (), lastLayerData.
valuesBegin (), lastLayerData.
valuesEnd ());
1309 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1318 template <
typename OutputContainer>
1319 void Net::fetchOutput (
const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer)
const 1321 for (
const LayerData& lastLayerData : lastLayerPatternData)
1322 fetchOutput (lastLayerData, outputContainer);
1327 template <
typename ItWeight>
1329 std::vector<LayerData>& lastLayerData,
1331 ItWeight itWeightBegin,
1332 ItWeight itWeightEnd)
const 1334 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1337 typename std::vector<Pattern>::const_iterator itPattern = batch.
begin ();
1338 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.
end ();
1340 double sumWeights (0.0);
1341 double sumError (0.0);
1343 size_t idxPattern = 0;
1345 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1351 const Pattern& _pattern = (*itPattern);
1352 double error = errorFunction (layerData, _pattern.
output (),
1353 itWeightBegin, itWeightEnd,
1359 return std::make_tuple (sumError, sumWeights);
1364 template <
typename Settings>
1367 size_t trainFromLayer,
1368 size_t totalNumWeights)
const 1370 bool doTraining = layerPatternData.size () > trainFromLayer;
1374 size_t idxLayer = layerPatternData.size ();
1375 for (
auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1376 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1379 if (idxLayer <= trainFromLayer)
1382 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1383 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1385 size_t idxPattern = 0;
1387 for (
typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1388 itPrevLayerData = begin (prevLayerDataColl) ;
1389 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1391 LayerData& currLayerData = (*itCurrLayerData);
1392 LayerData& prevLayerData = *(itPrevLayerData);
1394 backward (prevLayerData, currLayerData);
1415 template <
typename LayerContainer,
typename PassThrough,
typename ItWeight,
typename ItGradient,
typename OutContainer>
1417 ItWeight itWeightBegin, ItWeight itWeightEnd,
1418 ItGradient itGradientBegin, ItGradient itGradientEnd,
1419 size_t trainFromLayer,
1420 OutContainer& outputContainer,
bool doFetchOutput)
const 1422 Settings& settings = std::get<0>(settingsAndBatch);
1423 Batch& batch = std::get<1>(settingsAndBatch);
1424 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1426 double sumError = 0.0;
1427 double sumWeights = 0.0;
1431 size_t totalNumWeights (0);
1432 std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1444 std::vector<double> valuesMean;
1445 std::vector<double> valuesStdDev;
1446 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1452 fetchOutput (layerPatternData.back (), outputContainer);
1457 std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1461 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1465 double batchSize = std::distance (std::begin (batch), std::end (batch));
1466 for (
auto it = itGradientBegin; it != itGradientEnd; ++it)
1470 sumError /= sumWeights;
1480 template <
typename OutIterator>
1486 int numInput = inputSize ();
1493 for (
auto& layer: layers ())
1495 double nIn = numInput;
1496 double stdDev =
sqrt (2.0/nIn);
1497 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1502 numInput = layer.numNodes ();
1510 int numInput = inputSize ();
1517 for (
auto& layer: layers ())
1519 double nIn = numInput;
1520 double minVal = -
sqrt(2.0/nIn);
1521 double maxVal =
sqrt (2.0/nIn);
1522 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1528 numInput = layer.numNodes ();
1536 int numInput = inputSize ();
1543 for (
auto& layer: layers ())
1546 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1551 numInput = layer.numNodes ();
1559 int numInput = inputSize ();
1566 for (
auto& layer: layers ())
1568 double nIn = numInput;
1569 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1574 numInput = layer.numNodes ();
1589 template <
typename Container,
typename ItWeight>
1593 ItWeight itWeightEnd,
1594 double patternWeight,
1595 double factorWeightDecay,
1599 switch (m_eErrorFunction)
1612 std::vector<double> probabilities = layerData.
probabilities ();
1613 error =
crossEntropy (begin (probabilities), end (probabilities),
1614 begin (truth), end (truth),
1622 std::cout <<
"softmax." << std::endl;
1624 std::vector<double> probabilities = layerData.
probabilities ();
1626 begin (truth), end (truth),
1635 error =
weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
void addPoint(std::string histoName, double x)
for monitoring
static std::shared_ptr< std::function< double(double)> > InvGauss
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
size_t convergenceCount() const
returns the current convergence count
virtual void cycle(double progress, TString text)
static std::shared_ptr< std::function< double(double)> > Tanh
std::vector< char > DropContainer
static std::shared_ptr< std::function< double(double)> > InvReLU
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
bool isFlagSet(T flag, T value)
static std::shared_ptr< std::function< double(double)> > InvTanh
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
size_t convergenceSteps() const
how many steps until training is deemed to have converged
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
std::vector< double > & output()
const std::vector< double > & dropFractions() const
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
EnumRegularization regularization() const
some regularization of the DNN is turned on?
bool useMultithreading() const
is multithreading turned on?
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
virtual void testIteration()
callback for monitoring and loggging
static std::shared_ptr< std::function< double(double)> > InvSoftSign
static std::shared_ptr< std::function< double(double)> > TanhShift
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
static std::shared_ptr< std::function< double(double)> > Sigmoid
virtual void startTestCycle()
callback for monitoring and loggging
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ModeOutputValues outputMode() const
returns the output mode
std::shared_ptr< std::function< double(double)> > activationFunction() const
double pow(double, double)
static std::shared_ptr< std::function< double(double)> > SymmReLU
const_iterator end() const
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
void create(std::string histoName, int bins, double min, double max)
for monitoring
T uniformFromTo(T from, T to)
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
size_t testRepetitions() const
how often is the test data tested
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
size_t dropRepetitions() const
container_type probabilities() const
computes the probabilities from the current node values and returns them
Double_t(* Function)(Double_t)
double factorWeightDecay() const
get the weight-decay factor
virtual void endTrainCycle(double)
callback for monitoring and logging
RooCmdArg Minimizer(const char *type, const char *alg=0)
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
char * Form(const char *fmt,...)
static std::shared_ptr< std::function< double(double)> > InvSoftPlus
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
void pads(int numPads)
preparation for monitoring
std::vector< double >::const_iterator const_iterator
static std::shared_ptr< std::function< double(double)> > SoftSign
Settings for the training of the neural net.
virtual void startTrainCycle()
WeightInitializationStrategy
weight initialization strategies to be chosen from
static std::shared_ptr< std::function< double(double)> > ReLU
static std::shared_ptr< std::function< double(double)> > InvSigmoid
std::vector< double > m_localGradients
local gradients for reuse in thread.
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
static std::shared_ptr< std::function< double(double)> > GaussComplement
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
double gaussDouble(double mean, double sigma)
The Batch class encapsulates one mini-batch.
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
double m_beta
internal parameter (momentum)
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
size_t maxConvergenceCount() const
returns the max convergence count so far
static RooMathCoreReg dummy
static std::shared_ptr< std::function< double(double)> > Gauss
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
double uniformDouble(double minValue, double maxValue)
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
static std::shared_ptr< std::function< double(double)> > Linear
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Abstract ClassifierFactory template that handles arbitrary types.
static std::shared_ptr< std::function< double(double)> > InvGaussComplement
static std::shared_ptr< std::function< double(double)> > InvLinear
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
size_t batchSize() const
mini-batch size
virtual void endTestCycle()
callback for monitoring and loggging
static std::shared_ptr< std::function< double(double)> > InvTanhShift
std::vector< double > m_localWeights
local weights for reuse in thread.
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
void backPropagate(std::vector< std::vector< LayerData >> &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
const_iterator begin() const
DropContainer::const_iterator const_dropout_iterator
double m_alpha
internal parameter (learningRate)
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
static std::shared_ptr< std::function< double(double)> > SoftPlus
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
static std::shared_ptr< std::function< double(double)> > ZeroFnc
virtual bool hasConverged(double testError)
has this training converged already?
LayerData holds the data of one layer.
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
static std::shared_ptr< std::function< double(double)> > InvSymmReLU