1#ifndef TMVA_NEURAL_NET_I
2#define TMVA_NEURAL_NET_I
5#error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."
9#pragma GCC diagnostic ignored "-Wunused-variable"
36 return from + (rand ()* (to - from)/RAND_MAX);
41 template <
typename Container,
typename T>
44 for (
auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
52 extern std::shared_ptr<std::function<
double(
double)>>
ZeroFnc;
55 extern std::shared_ptr<std::function<
double(
double)>>
Sigmoid;
58 extern std::shared_ptr<std::function<
double(
double)>>
Tanh;
59 extern std::shared_ptr<std::function<
double(
double)>>
InvTanh;
61 extern std::shared_ptr<std::function<
double(
double)>>
Linear;
67 extern std::shared_ptr<std::function<
double(
double)>>
ReLU;
68 extern std::shared_ptr<std::function<
double(
double)>>
InvReLU;
79 extern std::shared_ptr<std::function<
double(
double)>>
Gauss;
90template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItTarget,
typename ItDrop>
93 ItTarget itTargetBegin, ItTarget itTargetEnd,
96 for (
auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
98 for (
auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
100 if (!HasDropOut || *itDrop)
101 (*itTarget) += (*itSource) * (*itWeight);
104 if (HasDropOut) ++itDrop;
117template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItPrev,
typename ItDrop>
120 ItPrev itPrevBegin, ItPrev itPrevEnd,
123 for (
auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
125 for (
auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
127 if (!HasDropOut || *itDrop)
128 (*itPrev) += (*itCurr) * (*itWeight);
131 if (HasDropOut) ++itDrop;
146 template <
typename ItValue,
typename Fnc>
149 while (itValue != itValueEnd)
151 auto& value = (*itValue);
152 value = (*fnc.get ()) (value);
163 template <
typename ItValue,
typename Fnc,
typename InvFnc,
typename ItGradient>
164 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
166 while (itValue != itValueEnd)
168 auto& value = (*itValue);
169 value = (*fnc.get ()) (value);
170 (*itGradient) = (*invFnc.get ()) (value);
172 ++itValue; ++itGradient;
182 template <
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient>
183 void update (ItSource itSource, ItSource itSourceEnd,
184 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
185 ItTargetGradient itTargetGradientBegin,
186 ItGradient itGradient)
188 while (itSource != itSourceEnd)
190 auto itTargetDelta = itTargetDeltaBegin;
191 auto itTargetGradient = itTargetGradientBegin;
192 while (itTargetDelta != itTargetDeltaEnd)
194 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
195 ++itTargetDelta; ++itTargetGradient; ++itGradient;
208 template <EnumRegularization Regularization>
221 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
228 return factorWeightDecay * weight;
236 template <EnumRegularization Regularization,
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient,
typename ItWeight>
237 void update (ItSource itSource, ItSource itSourceEnd,
238 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
239 ItTargetGradient itTargetGradientBegin,
240 ItGradient itGradient,
244 while (itSource != itSourceEnd)
246 auto itTargetDelta = itTargetDeltaBegin;
247 auto itTargetGradient = itTargetGradientBegin;
248 while (itTargetDelta != itTargetDeltaEnd)
250 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,
weightDecay);
251 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
262#define USELOCALWEIGHTS 1
270 template <
typename Function,
typename Weights,
typename PassThrough>
273 size_t numWeights = weights.size ();
288 size_t currentRepetition = 0;
301 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
304 (*itLocWeight) += (*itPrevG);
317 double maxGrad = 0.0;
318 for (; itG != itGEnd; ++itG, ++itPrevG)
320 double currGrad = (*itG);
321 double prevGrad = (*itPrevG);
325 currGrad += prevGrad;
327 (*itPrevG) = currGrad;
329 if (std::fabs (currGrad) > maxGrad)
336 std::cout <<
"\nlearning rate reduced to " <<
m_alpha << std::endl;
337 std::for_each (weights.begin (), weights.end (), [maxGrad](
double& w)
345 auto itW = std::begin (weights);
381 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename InvFnc>
382 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc,
double patternWeight)
384 double errorSum = 0.0;
387 ItTruth itTruth = itTruthBegin;
388 bool hasDeltas = (itDelta != itDeltaEnd);
389 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
392 double output = (*itOutput);
393 double error =
output - (*itTruth);
396 (*itDelta) = (*invFnc.get ()) (
output) * error * patternWeight;
399 errorSum += error*error * patternWeight;
411 template <
typename ItProbability,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
412 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
414 bool hasDeltas = (itDelta != itDeltaEnd);
416 double errorSum = 0.0;
417 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
419 double probability = *itProbability;
420 double truth = *itTruthBegin;
423 truth = truth < 0.5 ? 0.1 : 0.9;
426 double delta = probability - truth;
427 (*itDelta) = delta*patternWeight;
432 if (probability == 0)
437 else if (probability == 1)
443 error += - (truth * log (probability) + (1.0-truth) * log (1.0-probability));
444 errorSum += error * patternWeight;
457 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
458 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
460 double errorSum = 0.0;
462 bool hasDeltas = (itDelta != itDeltaEnd);
464 ItTruth itTruth = itTruthBegin;
465 for (
auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
468 double probability = (*itProbability);
469 double truth = (*itTruth);
472 (*itDelta) = probability - truth;
478 error += truth * log (probability);
482 return -errorSum * patternWeight;
497 template <
typename ItWeight>
505 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
507 double weight = (*itWeight);
508 w += std::fabs (weight);
510 return error + 0.5 * w * factorWeightDecay /
n;
517 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
519 double weight = (*itWeight);
522 return error + 0.5 * w * factorWeightDecay /
n;
545 template <
typename LAYERDATA>
546 void forward (
const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
548 if (prevLayerData.hasDropOut ())
550 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
551 currLayerData.weightsBegin (),
552 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
553 prevLayerData.dropOut ());
558 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
559 currLayerData.weightsBegin (),
560 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
571template <
typename LAYERDATA>
572 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
574 if (prevLayerData.hasDropOut ())
576 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
577 currLayerData.weightsBegin (),
578 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
579 prevLayerData.dropOut ());
584 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
585 currLayerData.weightsBegin (),
586 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
599 template <
typename LAYERDATA>
603 if (factorWeightDecay != 0.0)
606 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
607 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
608 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
609 currLayerData.weightsBegin (), factorWeightDecay);
613 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
614 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
615 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
616 currLayerData.weightsBegin (), factorWeightDecay);
620 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
621 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
622 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
627 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
628 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
629 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
651 template <
typename WeightsType,
typename DropProbabilities>
653 const DropProbabilities& drops,
656 if (drops.empty () || weights.empty ())
659 auto itWeight = std::begin (weights);
660 auto itWeightEnd = std::end (weights);
661 auto itDrop = std::begin (drops);
662 auto itDropEnd = std::end (drops);
664 double dropFractionPrev = *itDrop;
667 for (
auto& layer :
layers ())
669 if (itDrop == itDropEnd)
672 size_t _numNodes = layer.numNodes ();
674 double dropFraction = *itDrop;
675 double pPrev = 1.0 - dropFractionPrev;
676 double p = 1.0 - dropFraction;
683 size_t _numWeights = layer.numWeights (numNodesPrev);
684 for (
size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
686 if (itWeight == itWeightEnd)
692 numNodesPrev = _numNodes;
693 dropFractionPrev = dropFraction;
711 template <
typename Minimizer>
713 std::vector<Pattern>& trainPattern,
714 const std::vector<Pattern>& testPattern,
715 Minimizer& minimizer,
725 settings.
create (
"trainErrors", 100, 0, 100, 100, 0,1);
726 settings.
create (
"testErrors", 100, 0, 100, 100, 0,1);
728 size_t cycleCount = 0;
729 size_t testCycleCount = 0;
730 double testError = 1e20;
731 double trainError = 1e20;
732 size_t dropOutChangeCount = 0;
736 const std::vector<double>& dropFractions = settings.
dropFractions ();
737 bool isWeightsForDrop =
false;
746 size_t dropIndex = 0;
747 if (!dropFractions.empty () && dropOutChangeCount % settings.
dropRepetitions () == 0)
750 dropContainer.clear ();
752 double dropFraction = 0.0;
753 dropFraction = dropFractions.at (dropIndex);
756 for (
auto itLayer = begin (
m_layers), itLayerEnd = end (
m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
758 auto& layer = *itLayer;
759 _numNodes = layer.numNodes ();
762 if (dropFractions.size () > dropIndex)
763 dropFraction = dropFractions.at (dropIndex);
767 isWeightsForDrop =
true;
771 trainError =
trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
775 bool hasConverged =
false;
778 if (isWeightsForDrop)
781 isWeightsForDrop =
false;
790 size_t numThreads = std::thread::hardware_concurrency ();
791 size_t patternPerThread = testPattern.size () / numThreads;
792 std::vector<Batch> batches;
793 auto itPat = testPattern.begin ();
795 for (
size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
797 batches.push_back (
Batch (itPat, itPat + patternPerThread));
798 itPat += patternPerThread;
800 if (itPat != testPattern.end ())
801 batches.push_back (
Batch (itPat, testPattern.end ()));
803 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
804 for (
auto& batch : batches)
808 std::async (std::launch::async, [&]()
810 std::vector<double> localOutput;
812 double testBatchError = (*this) (passThrough, weights,
ModeOutput::FETCH, localOutput);
813 return std::make_tuple (testBatchError, localOutput);
818 auto itBatch = batches.begin ();
819 for (
auto&
f : futures)
821 std::tuple<double,std::vector<double>> result =
f.get ();
822 testError += std::get<0>(result) / batches.size ();
823 std::vector<double>
output = std::get<1>(result);
826 auto output_iterator =
output.begin();
827 for (
auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
829 for (
size_t output_index = 1; output_index <
outputSize(); ++output_index)
831 settings.
testSample (0, *output_iterator, (*pattern_it).output ().at (0),
832 (*pattern_it).weight ());
843 std::vector<double>
output;
849 Batch batch (begin (testPattern), end (testPattern));
855 auto output_iterator =
output.begin();
856 for (
auto pattern_it = batch.
begin(); pattern_it != batch.
end(); ++pattern_it)
858 for (
size_t output_index = 1; output_index <
outputSize(); ++output_index)
860 settings.
testSample (0, *output_iterator, (*pattern_it).output ().at (0),
861 (*pattern_it).weight ());
866 testError += testPatternError;
876 if (!hasConverged && !isWeightsForDrop)
879 isWeightsForDrop =
true;
883 ++dropOutChangeCount;
887 settings.
addPoint (
"trainErrors", cycleCount, trainError);
888 settings.
addPoint (
"testErrors", cycleCount, testError);
889 settings.
plot (
"trainErrors",
"C", 1,
kBlue);
903 if ((
int)cycleCount % 10 == 0) {
905 TString convText =
Form(
"(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
912 settings.
cycle (progress, convText);
918 TString convText =
Form(
"(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (
int)cycleCount);
920 settings.
cycle (progress, convText);
938 template <
typename Iterator,
typename Minimizer>
943 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
944 size_t numBatches = numPattern/settings.
batchSize ();
945 size_t numBatches_stored = numBatches;
947 std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});
948 Iterator itPatternBatchBegin = itPatternBegin;
949 Iterator itPatternBatchEnd = itPatternBatchBegin;
952 std::vector<Batch> batches;
953 while (numBatches > 0)
955 std::advance (itPatternBatchEnd, settings.
batchSize ());
956 batches.push_back (
Batch (itPatternBatchBegin, itPatternBatchEnd));
957 itPatternBatchBegin = itPatternBatchEnd;
962 if (itPatternBatchEnd != itPatternEnd)
963 batches.push_back (
Batch (itPatternBatchEnd, itPatternEnd));
970 size_t numThreads = std::thread::hardware_concurrency ();
971 size_t batchesPerThread = batches.size () / numThreads;
972 typedef std::vector<Batch>::iterator batch_iterator;
973 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
974 batch_iterator itBatchBegin = std::begin (batches);
975 batch_iterator itBatchCurrEnd = std::begin (batches);
976 batch_iterator itBatchEnd = std::end (batches);
977 for (
size_t iT = 0; iT < numThreads; ++iT)
979 if (iT == numThreads-1)
980 itBatchCurrEnd = itBatchEnd;
982 std::advance (itBatchCurrEnd, batchesPerThread);
983 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
984 itBatchBegin = itBatchCurrEnd;
988 std::vector<std::future<double>> futures;
989 for (
auto& batchRange : batchVec)
993 std::async (std::launch::async, [&]()
995 double localError = 0.0;
996 for (
auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
1000 Minimizer minimizerClone (minimizer);
1001 localError += minimizerClone ((*
this), weights, settingsAndBatch);
1008 for (
auto&
f : futures)
1013 for (
auto& batch : batches)
1015 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1016 error += minimizer ((*
this), weights, settingsAndBatch);
1020 numBatches_stored = std::max (numBatches_stored,
size_t(1));
1021 error /= numBatches_stored;
1036 template <
typename Weights>
1037 std::vector<double>
Net::compute (
const std::vector<double>& input,
const Weights& weights)
const
1039 std::vector<LayerData> layerData;
1040 layerData.reserve (
m_layers.size ()+1);
1041 auto itWeight = begin (weights);
1042 auto itInputBegin = begin (input);
1043 auto itInputEnd = end (input);
1044 layerData.push_back (
LayerData (itInputBegin, itInputEnd));
1045 size_t numNodesPrev = input.size ();
1050 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1051 layer.activationFunction (),
1052 layer.modeOutputValues ()));
1053 size_t _numWeights = layer.numWeights (numNodesPrev);
1054 itWeight += _numWeights;
1055 numNodesPrev = layer.numNodes ();
1063 std::vector<double>
output;
1069 template <
typename Weights,
typename PassThrough>
1072 std::vector<double> nothing;
1074 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing,
false);
1078 template <
typename Weights,
typename PassThrough,
typename OutContainer>
1081 std::vector<double> nothing;
1083 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer,
true);
1088 template <
typename Weights,
typename Gradients,
typename PassThrough>
1089 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients)
const
1091 std::vector<double> nothing;
1093 assert (weights.size () == gradients.size ());
1094 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing,
false);
1098 template <
typename Weights,
typename Gradients,
typename PassThrough,
typename OutContainer>
1099 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients,
ModeOutput eFetch, OutContainer& outputContainer)
const
1103 assert (weights.size () == gradients.size ());
1104 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer,
true);
1110 template <
typename LayerContainer,
typename DropContainer,
typename ItWeight,
typename ItGradient>
1114 ItWeight itWeightBegin,
1116 ItGradient itGradientBegin,
1117 ItGradient itGradientEnd,
1118 size_t& totalNumWeights)
const
1121 bool usesDropOut = !dropContainer.empty ();
1123 itDropOut = std::begin (dropContainer);
1125 if (_layers.empty ())
1126 throw std::string (
"no layers in this net");
1132 totalNumWeights = 0;
1133 std::vector<std::vector<LayerData>> layerPatternData;
1134 layerPatternData.reserve (_layers.size ()+1);
1135 ItWeight itWeight = itWeightBegin;
1136 ItGradient itGradient = itGradientBegin;
1148 layerPatternData.push_back (std::vector<LayerData>());
1149 for (
const Pattern& _pattern : batch)
1151 std::vector<LayerData>& layerData = layerPatternData.back ();
1152 layerData.push_back (
LayerData (numNodesPrev));
1154 itInputBegin = _pattern.beginInput ();
1155 itInputEnd = _pattern.endInput ();
1156 layerData.back ().setInput (itInputBegin, itInputEnd);
1159 layerData.back ().setDropOut (itDropOut);
1165 itDropOut += _layers.back ().numNodes ();
1169 for (
auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1171 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1172 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1174 auto& layer = *itLayer;
1175 layerPatternData.push_back (std::vector<LayerData>());
1177 for (
const Pattern& _pattern : batch)
1179 std::vector<LayerData>& layerData = layerPatternData.back ();
1182 if (itGradientBegin == itGradientEnd)
1184 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1185 layer.activationFunction (),
1186 layer.modeOutputValues ()));
1190 layerData.push_back (
LayerData (layer.numNodes (), itWeight, itGradient,
1191 layer.activationFunction (),
1192 layer.inverseActivationFunction (),
1193 layer.modeOutputValues ()));
1198 layerData.back ().setDropOut (itDropOut);
1205 itDropOut += layer.numNodes ();
1207 size_t _numWeights = layer.numWeights (numNodesPrev);
1208 totalNumWeights += _numWeights;
1209 itWeight += _numWeights;
1210 itGradient += _numWeights;
1211 numNodesPrev = layer.numNodes ();
1214 assert (totalNumWeights > 0);
1215 return layerPatternData;
1220 template <
typename LayerContainer>
1222 std::vector<LayerData>& layerData)
const
1224 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1225 for (; idxLayer < idxLayerEnd; ++idxLayer)
1227 LayerData& prevLayerData = layerData.at (idxLayer);
1228 LayerData& currLayerData = layerData.at (idxLayer+1);
1230 forward (prevLayerData, currLayerData);
1239 template <
typename LayerContainer,
typename LayerPatternContainer>
1241 LayerPatternContainer& layerPatternData,
1242 std::vector<double>& valuesMean,
1243 std::vector<double>& valuesStdDev,
1244 size_t trainFromLayer)
const
1246 valuesMean.clear ();
1247 valuesStdDev.clear ();
1250 for (
size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1252 bool doTraining = idxLayer >= trainFromLayer;
1255 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1256 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1258 size_t numPattern = prevLayerPatternData.size ();
1259 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1261 std::vector<MeanVariance> means (numNodesLayer);
1263 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1265 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1266 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1269 forward (prevLayerData, currLayerData);
1273 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1276 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1290 template <
typename OutputContainer>
1296 outputContainer.insert (outputContainer.end (), lastLayerData.
valuesBegin (), lastLayerData.
valuesEnd ());
1302 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1311 template <
typename OutputContainer>
1312 void Net::fetchOutput (
const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer)
const
1314 for (
const LayerData& lastLayerData : lastLayerPatternData)
1320 template <
typename ItWeight>
1322 std::vector<LayerData>& lastLayerData,
1324 ItWeight itWeightBegin,
1325 ItWeight itWeightEnd)
const
1327 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1330 typename std::vector<Pattern>::const_iterator itPattern = batch.
begin ();
1331 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.
end ();
1333 double sumWeights (0.0);
1334 double sumError (0.0);
1336 size_t idxPattern = 0;
1338 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1344 const Pattern& _pattern = (*itPattern);
1346 itWeightBegin, itWeightEnd,
1349 sumWeights += fabs (_pattern.
weight ());
1352 return std::make_tuple (sumError, sumWeights);
1357 template <
typename Settings>
1360 size_t trainFromLayer,
1361 size_t totalNumWeights)
const
1363 bool doTraining = layerPatternData.size () > trainFromLayer;
1367 size_t idxLayer = layerPatternData.size ();
1368 for (
auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1369 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1372 if (idxLayer <= trainFromLayer)
1375 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1376 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1378 size_t idxPattern = 0;
1380 for (
typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1381 itPrevLayerData = begin (prevLayerDataColl) ;
1382 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1384 LayerData& currLayerData = (*itCurrLayerData);
1385 LayerData& prevLayerData = *(itPrevLayerData);
1387 backward (prevLayerData, currLayerData);
1408 template <
typename LayerContainer,
typename PassThrough,
typename ItWeight,
typename ItGradient,
typename OutContainer>
1410 ItWeight itWeightBegin, ItWeight itWeightEnd,
1411 ItGradient itGradientBegin, ItGradient itGradientEnd,
1412 size_t trainFromLayer,
1413 OutContainer& outputContainer,
bool doFetchOutput)
const
1415 Settings& settings = std::get<0>(settingsAndBatch);
1416 Batch& batch = std::get<1>(settingsAndBatch);
1417 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1419 double sumError = 0.0;
1420 double sumWeights = 0.0;
1424 size_t totalNumWeights (0);
1425 std::vector<std::vector<LayerData>> layerPatternData =
prepareLayerData (_layers,
1437 std::vector<double> valuesMean;
1438 std::vector<double> valuesStdDev;
1439 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1445 fetchOutput (layerPatternData.back (), outputContainer);
1450 std::tie (sumError, sumWeights) =
computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1454 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1458 double batchSize = std::distance (std::begin (batch), std::end (batch));
1459 for (
auto it = itGradientBegin; it != itGradientEnd; ++it)
1463 sumError /= sumWeights;
1473 template <
typename OutIterator>
1486 for (
auto& layer:
layers ())
1488 double nIn = numInput;
1489 double stdDev = sqrt (2.0/nIn);
1490 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1495 numInput = layer.numNodes ();
1510 for (
auto& layer:
layers ())
1512 double nIn = numInput;
1513 double minVal = -sqrt(2.0/nIn);
1514 double maxVal = sqrt (2.0/nIn);
1515 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1521 numInput = layer.numNodes ();
1536 for (
auto& layer:
layers ())
1539 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1544 numInput = layer.numNodes ();
1559 for (
auto& layer:
layers ())
1561 double nIn = numInput;
1562 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1567 numInput = layer.numNodes ();
1582 template <
typename Container,
typename ItWeight>
1586 ItWeight itWeightEnd,
1587 double patternWeight,
1588 double factorWeightDecay,
1605 std::vector<double> probabilities = layerData.
probabilities ();
1606 error =
crossEntropy (begin (probabilities), end (probabilities),
1607 begin (truth), end (truth),
1615 std::cout <<
"softmax." << std::endl;
1617 std::vector<double> probabilities = layerData.
probabilities ();
1619 begin (truth), end (truth),
1628 error =
weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
char * Form(const char *fmt,...)
Double_t(* Function)(Double_t)
std::vector< double >::const_iterator const_iterator
std::vector< double > & output()
The Batch class encapsulates one mini-batch.
const_iterator begin() const
const_iterator end() const
LayerData holds the data of one layer.
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
ModeOutputValues outputMode() const
returns the output mode
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
std::shared_ptr< std::function< double(double)> > activationFunction() const
container_type probabilities() const
computes the probabilities from the current node values and returns them
DropContainer::const_iterator const_dropout_iterator
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
std::vector< Layer > m_layers
layer-structure-data
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
size_t inputSize() const
input size of the DNN
ModeErrorFunction m_eErrorFunction
denotes the error function
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
const std::vector< Layer > & layers() const
returns the layers (structure)
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
size_t outputSize() const
output size of the DNN
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
IPythonInteractive * fInteractive
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Settings for the training of the neural net.
bool useMultithreading() const
is multithreading turned on?
EnumRegularization regularization() const
some regularization of the DNN is turned on?
size_t convergenceCount() const
returns the current convergence count
size_t testRepetitions() const
how often is the test data tested
virtual void endTestCycle()
callback for monitoring and loggging
virtual void testIteration()
callback for monitoring and loggging
virtual bool hasConverged(double testError)
has this training converged already?
virtual void cycle(double progress, TString text)
virtual void endTrainCycle(double)
callback for monitoring and logging
const std::vector< double > & dropFractions() const
void addPoint(std::string histoName, double x)
for monitoring
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
virtual void startTrainCycle()
size_t convergenceSteps() const
how many steps until training is deemed to have converged
double factorWeightDecay() const
get the weight-decay factor
size_t maxConvergenceCount() const
returns the max convergence count so far
void pads(int numPads)
preparation for monitoring
size_t batchSize() const
mini-batch size
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
size_t dropRepetitions() const
void create(std::string histoName, int bins, double min, double max)
for monitoring
virtual void startTestCycle()
callback for monitoring and loggging
double m_beta
internal parameter (momentum)
std::vector< double > m_localGradients
local gradients for reuse in thread.
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
double m_alpha
internal parameter (learningRate)
std::vector< double > m_localWeights
local weights for reuse in thread.
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
std::shared_ptr< std::function< double(double)> > InvGauss
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double uniformDouble(double minValue, double maxValue)
std::shared_ptr< std::function< double(double)> > SymmReLU
std::shared_ptr< std::function< double(double)> > TanhShift
std::shared_ptr< std::function< double(double)> > Tanh
std::shared_ptr< std::function< double(double)> > InvSigmoid
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
T uniformFromTo(T from, T to)
double computeRegularization< EnumRegularization::L1 >(double weight, const double &factorWeightDecay)
std::shared_ptr< std::function< double(double)> > SoftPlus
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
std::shared_ptr< std::function< double(double)> > ZeroFnc
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
std::shared_ptr< std::function< double(double)> > InvSoftSign
std::shared_ptr< std::function< double(double)> > InvGaussComplement
double computeRegularization< EnumRegularization::L2 >(double weight, const double &factorWeightDecay)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
@ CROSSENTROPY_MUTUALEXCLUSIVE
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
std::shared_ptr< std::function< double(double)> > InvTanh
std::shared_ptr< std::function< double(double)> > Linear
WeightInitializationStrategy
weight initialization strategies to be chosen from
std::shared_ptr< std::function< double(double)> > InvReLU
std::shared_ptr< std::function< double(double)> > GaussComplement
std::shared_ptr< std::function< double(double)> > Gauss
std::shared_ptr< std::function< double(double)> > Sigmoid
double gaussDouble(double mean, double sigma)
std::shared_ptr< std::function< double(double)> > SoftSign
std::shared_ptr< std::function< double(double)> > InvSoftPlus
std::shared_ptr< std::function< double(double)> > ReLU
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
bool isFlagSet(T flag, T value)
std::shared_ptr< std::function< double(double)> > InvTanhShift
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
std::vector< char > DropContainer
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
std::shared_ptr< std::function< double(double)> > InvSymmReLU
std::shared_ptr< std::function< double(double)> > InvLinear
create variable transformations
static void output(int code)