1#ifndef TMVA_NEURAL_NET_I
2#define TMVA_NEURAL_NET_I
5#error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."
9#pragma GCC diagnostic ignored "-Wunused-variable"
36 return from + (rand ()* (to - from)/RAND_MAX);
41 template <
typename Container,
typename T>
44 for (
auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
52 extern std::shared_ptr<std::function<
double(
double)>>
ZeroFnc;
55 extern std::shared_ptr<std::function<
double(
double)>>
Sigmoid;
58 extern std::shared_ptr<std::function<
double(
double)>>
Tanh;
59 extern std::shared_ptr<std::function<
double(
double)>>
InvTanh;
61 extern std::shared_ptr<std::function<
double(
double)>>
Linear;
67 extern std::shared_ptr<std::function<
double(
double)>>
ReLU;
68 extern std::shared_ptr<std::function<
double(
double)>>
InvReLU;
79 extern std::shared_ptr<std::function<
double(
double)>>
Gauss;
90template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItTarget,
typename ItDrop>
93 ItTarget itTargetBegin, ItTarget itTargetEnd,
96 for (
auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
98 for (
auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
100 if (!HasDropOut || *itDrop)
101 (*itTarget) += (*itSource) * (*itWeight);
104 if (HasDropOut) ++itDrop;
117template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItPrev,
typename ItDrop>
120 ItPrev itPrevBegin, ItPrev itPrevEnd,
123 for (
auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
125 for (
auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
127 if (!HasDropOut || *itDrop)
128 (*itPrev) += (*itCurr) * (*itWeight);
131 if (HasDropOut) ++itDrop;
146 template <
typename ItValue,
typename Fnc>
149 while (itValue != itValueEnd)
151 auto& value = (*itValue);
152 value = (*fnc.get ()) (value);
163 template <
typename ItValue,
typename Fnc,
typename InvFnc,
typename ItGradient>
164 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
166 while (itValue != itValueEnd)
168 auto& value = (*itValue);
169 value = (*fnc.get ()) (value);
170 (*itGradient) = (*invFnc.get ()) (value);
172 ++itValue; ++itGradient;
182 template <
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient>
183 void update (ItSource itSource, ItSource itSourceEnd,
184 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
185 ItTargetGradient itTargetGradientBegin,
186 ItGradient itGradient)
188 while (itSource != itSourceEnd)
190 auto itTargetDelta = itTargetDeltaBegin;
191 auto itTargetGradient = itTargetGradientBegin;
192 while (itTargetDelta != itTargetDeltaEnd)
194 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
195 ++itTargetDelta; ++itTargetGradient; ++itGradient;
208 template <EnumRegularization Regularization>
221 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
228 return factorWeightDecay * weight;
236 template <EnumRegularization Regularization,
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient,
typename ItWeight>
237 void update (ItSource itSource, ItSource itSourceEnd,
238 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
239 ItTargetGradient itTargetGradientBegin,
240 ItGradient itGradient,
244 while (itSource != itSourceEnd)
246 auto itTargetDelta = itTargetDeltaBegin;
247 auto itTargetGradient = itTargetGradientBegin;
248 while (itTargetDelta != itTargetDeltaEnd)
250 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,
weightDecay);
251 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
262#define USELOCALWEIGHTS 1
270 template <
typename Function,
typename Weights,
typename PassThrough>
273 size_t numWeights = weights.size ();
288 size_t currentRepetition = 0;
301 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
304 (*itLocWeight) += (*itPrevG);
317 double maxGrad = 0.0;
318 for (; itG != itGEnd; ++itG, ++itPrevG)
320 double currGrad = (*itG);
321 double prevGrad = (*itPrevG);
325 currGrad += prevGrad;
327 (*itPrevG) = currGrad;
329 if (std::fabs (currGrad) > maxGrad)
336 std::cout <<
"\nlearning rate reduced to " <<
m_alpha << std::endl;
337 std::for_each (weights.begin (), weights.end (), [maxGrad](
double& w)
345 auto itW = std::begin (weights);
381 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename InvFnc>
382 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc,
double patternWeight)
384 double errorSum = 0.0;
387 ItTruth itTruth = itTruthBegin;
388 bool hasDeltas = (itDelta != itDeltaEnd);
389 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
392 double output = (*itOutput);
393 double error =
output - (*itTruth);
396 (*itDelta) = (*invFnc.get ()) (
output) * error * patternWeight;
399 errorSum += error*error * patternWeight;
411 template <
typename ItProbability,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
412 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
414 bool hasDeltas = (itDelta != itDeltaEnd);
416 double errorSum = 0.0;
417 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
419 double probability = *itProbability;
420 double truth = *itTruthBegin;
423 truth = truth < 0.5 ? 0.1 : 0.9;
426 double delta = probability - truth;
427 (*itDelta) = delta*patternWeight;
432 if (probability == 0)
437 else if (probability == 1)
443 error += - (truth *
log (probability) + (1.0-truth) *
log (1.0-probability));
444 errorSum += error * patternWeight;
457 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
458 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
460 double errorSum = 0.0;
462 bool hasDeltas = (itDelta != itDeltaEnd);
464 ItTruth itTruth = itTruthBegin;
465 for (
auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
468 double probability = (*itProbability);
469 double truth = (*itTruth);
472 (*itDelta) = probability - truth;
478 error += truth *
log (probability);
482 return -errorSum * patternWeight;
497 template <
typename ItWeight>
505 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
507 double weight = (*itWeight);
508 w += std::fabs (weight);
510 return error + 0.5 * w * factorWeightDecay /
n;
517 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
519 double weight = (*itWeight);
522 return error + 0.5 * w * factorWeightDecay /
n;
545 template <
typename LAYERDATA>
546 void forward (
const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
548 if (prevLayerData.hasDropOut ())
550 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
551 currLayerData.weightsBegin (),
552 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
553 prevLayerData.dropOut ());
558 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
559 currLayerData.weightsBegin (),
560 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
571template <
typename LAYERDATA>
572 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
574 if (prevLayerData.hasDropOut ())
576 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
577 currLayerData.weightsBegin (),
578 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
579 prevLayerData.dropOut ());
584 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
585 currLayerData.weightsBegin (),
586 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
599 template <
typename LAYERDATA>
603 if (factorWeightDecay != 0.0)
606 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
607 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
608 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
609 currLayerData.weightsBegin (), factorWeightDecay);
613 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
614 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
615 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
616 currLayerData.weightsBegin (), factorWeightDecay);
620 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
621 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
622 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
627 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
628 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
629 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
651 template <
typename WeightsType,
typename DropProbabilities>
653 const DropProbabilities& drops,
656 if (drops.empty () || weights.empty ())
659 auto itWeight = std::begin (weights);
660 auto itWeightEnd = std::end (weights);
661 auto itDrop = std::begin (drops);
662 auto itDropEnd = std::end (drops);
664 double dropFractionPrev = *itDrop;
667 for (
auto& layer :
layers ())
669 if (itDrop == itDropEnd)
672 size_t _numNodes = layer.numNodes ();
674 double dropFraction = *itDrop;
675 double pPrev = 1.0 - dropFractionPrev;
676 double p = 1.0 - dropFraction;
683 size_t _numWeights = layer.numWeights (numNodesPrev);
684 for (
size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
686 if (itWeight == itWeightEnd)
692 numNodesPrev = _numNodes;
693 dropFractionPrev = dropFraction;
711 template <
typename Minimizer>
713 std::vector<Pattern>& trainPattern,
714 const std::vector<Pattern>& testPattern,
715 Minimizer& minimizer,
725 settings.
create (
"trainErrors", 100, 0, 100, 100, 0,1);
726 settings.
create (
"testErrors", 100, 0, 100, 100, 0,1);
728 size_t cycleCount = 0;
729 size_t testCycleCount = 0;
730 double testError = 1e20;
731 double trainError = 1e20;
732 size_t dropOutChangeCount = 0;
736 const std::vector<double>& dropFractions = settings.
dropFractions ();
737 bool isWeightsForDrop =
false;
746 size_t dropIndex = 0;
747 if (!dropFractions.empty () && dropOutChangeCount % settings.
dropRepetitions () == 0)
750 dropContainer.clear ();
752 double dropFraction = 0.0;
753 dropFraction = dropFractions.at (dropIndex);
756 for (
auto itLayer = begin (
m_layers), itLayerEnd = end (
m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
758 auto& layer = *itLayer;
759 _numNodes = layer.numNodes ();
762 if (dropFractions.size () > dropIndex)
763 dropFraction = dropFractions.at (dropIndex);
767 isWeightsForDrop =
true;
771 trainError =
trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
775 bool hasConverged =
false;
778 if (isWeightsForDrop)
781 isWeightsForDrop =
false;
790 size_t numThreads = std::thread::hardware_concurrency ();
791 size_t patternPerThread = testPattern.size () / numThreads;
792 std::vector<Batch> batches;
793 auto itPat = testPattern.begin ();
795 for (
size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
797 batches.push_back (
Batch (itPat, itPat + patternPerThread));
798 itPat += patternPerThread;
800 if (itPat != testPattern.end ())
801 batches.push_back (
Batch (itPat, testPattern.end ()));
803 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
804 for (
auto& batch : batches)
808 std::async (std::launch::async, [&]()
810 std::vector<double> localOutput;
812 double testBatchError = (*this) (passThrough, weights,
ModeOutput::FETCH, localOutput);
813 return std::make_tuple (testBatchError, localOutput);
818 auto itBatch = batches.begin ();
819 for (
auto&
f : futures)
821 std::tuple<double,std::vector<double>> result =
f.get ();
822 testError += std::get<0>(result) / batches.size ();
823 std::vector<double>
output = std::get<1>(result);
826 auto output_iterator =
output.begin();
827 for (
auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
829 for (
size_t output_index = 1; output_index <
outputSize(); ++output_index)
831 settings.
testSample (0, *output_iterator, (*pattern_it).output ().at (0),
832 (*pattern_it).weight ());
843 std::vector<double>
output;
849 Batch batch (begin (testPattern), end (testPattern));
855 auto output_iterator =
output.begin();
856 for (
auto pattern_it = batch.
begin(); pattern_it != batch.
end(); ++pattern_it)
858 for (
size_t output_index = 1; output_index <
outputSize(); ++output_index)
860 settings.
testSample (0, *output_iterator, (*pattern_it).output ().at (0),
861 (*pattern_it).weight ());
866 testError += testPatternError;
876 if (!hasConverged && !isWeightsForDrop)
879 isWeightsForDrop =
true;
883 ++dropOutChangeCount;
886 static double x = -1.0;
889 settings.
addPoint (
"trainErrors", cycleCount, trainError);
890 settings.
addPoint (
"testErrors", cycleCount, testError);
891 settings.
plot (
"trainErrors",
"C", 1,
kBlue);
905 if ((
int)cycleCount % 10 == 0) {
907 TString convText =
Form(
"(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
914 settings.
cycle (progress, convText);
920 TString convText =
Form(
"(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (
int)cycleCount);
922 settings.
cycle (progress, convText);
940 template <
typename Iterator,
typename Minimizer>
945 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
946 size_t numBatches = numPattern/settings.
batchSize ();
947 size_t numBatches_stored = numBatches;
949 std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});
950 Iterator itPatternBatchBegin = itPatternBegin;
951 Iterator itPatternBatchEnd = itPatternBatchBegin;
954 std::vector<Batch> batches;
955 while (numBatches > 0)
957 std::advance (itPatternBatchEnd, settings.
batchSize ());
958 batches.push_back (
Batch (itPatternBatchBegin, itPatternBatchEnd));
959 itPatternBatchBegin = itPatternBatchEnd;
964 if (itPatternBatchEnd != itPatternEnd)
965 batches.push_back (
Batch (itPatternBatchEnd, itPatternEnd));
972 size_t numThreads = std::thread::hardware_concurrency ();
973 size_t batchesPerThread = batches.size () / numThreads;
974 typedef std::vector<Batch>::iterator batch_iterator;
975 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
976 batch_iterator itBatchBegin = std::begin (batches);
977 batch_iterator itBatchCurrEnd = std::begin (batches);
978 batch_iterator itBatchEnd = std::end (batches);
979 for (
size_t iT = 0; iT < numThreads; ++iT)
981 if (iT == numThreads-1)
982 itBatchCurrEnd = itBatchEnd;
984 std::advance (itBatchCurrEnd, batchesPerThread);
985 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
986 itBatchBegin = itBatchCurrEnd;
990 std::vector<std::future<double>> futures;
991 for (
auto& batchRange : batchVec)
995 std::async (std::launch::async, [&]()
997 double localError = 0.0;
998 for (
auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
1002 Minimizer minimizerClone (minimizer);
1003 localError += minimizerClone ((*
this), weights, settingsAndBatch);
1010 for (
auto&
f : futures)
1015 for (
auto& batch : batches)
1017 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1018 error += minimizer ((*
this), weights, settingsAndBatch);
1022 numBatches_stored = std::max (numBatches_stored,
size_t(1));
1023 error /= numBatches_stored;
1038 template <
typename Weights>
1039 std::vector<double>
Net::compute (
const std::vector<double>& input,
const Weights& weights)
const
1041 std::vector<LayerData> layerData;
1042 layerData.reserve (
m_layers.size ()+1);
1043 auto itWeight = begin (weights);
1044 auto itInputBegin = begin (input);
1045 auto itInputEnd = end (input);
1046 layerData.push_back (
LayerData (itInputBegin, itInputEnd));
1047 size_t numNodesPrev = input.size ();
1052 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1053 layer.activationFunction (),
1054 layer.modeOutputValues ()));
1055 size_t _numWeights = layer.numWeights (numNodesPrev);
1056 itWeight += _numWeights;
1057 numNodesPrev = layer.numNodes ();
1065 std::vector<double>
output;
1071 template <
typename Weights,
typename PassThrough>
1074 std::vector<double> nothing;
1076 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing,
false);
1080 template <
typename Weights,
typename PassThrough,
typename OutContainer>
1083 std::vector<double> nothing;
1085 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer,
true);
1090 template <
typename Weights,
typename Gradients,
typename PassThrough>
1091 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients)
const
1093 std::vector<double> nothing;
1095 assert (weights.size () == gradients.size ());
1096 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing,
false);
1100 template <
typename Weights,
typename Gradients,
typename PassThrough,
typename OutContainer>
1101 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients,
ModeOutput eFetch, OutContainer& outputContainer)
const
1105 assert (weights.size () == gradients.size ());
1106 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer,
true);
1112 template <
typename LayerContainer,
typename DropContainer,
typename ItWeight,
typename ItGradient>
1116 ItWeight itWeightBegin,
1118 ItGradient itGradientBegin,
1119 ItGradient itGradientEnd,
1120 size_t& totalNumWeights)
const
1123 bool usesDropOut = !dropContainer.empty ();
1125 itDropOut = std::begin (dropContainer);
1127 if (_layers.empty ())
1128 throw std::string (
"no layers in this net");
1134 totalNumWeights = 0;
1135 size_t totalNumNodes = 0;
1136 std::vector<std::vector<LayerData>> layerPatternData;
1137 layerPatternData.reserve (_layers.size ()+1);
1138 ItWeight itWeight = itWeightBegin;
1139 ItGradient itGradient = itGradientBegin;
1151 layerPatternData.push_back (std::vector<LayerData>());
1152 for (
const Pattern& _pattern : batch)
1154 std::vector<LayerData>& layerData = layerPatternData.back ();
1155 layerData.push_back (
LayerData (numNodesPrev));
1157 itInputBegin = _pattern.beginInput ();
1158 itInputEnd = _pattern.endInput ();
1159 layerData.back ().setInput (itInputBegin, itInputEnd);
1162 layerData.back ().setDropOut (itDropOut);
1168 itDropOut += _layers.back ().numNodes ();
1172 for (
auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1174 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1175 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1177 auto& layer = *itLayer;
1178 layerPatternData.push_back (std::vector<LayerData>());
1180 for (
const Pattern& _pattern : batch)
1182 std::vector<LayerData>& layerData = layerPatternData.back ();
1185 if (itGradientBegin == itGradientEnd)
1187 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1188 layer.activationFunction (),
1189 layer.modeOutputValues ()));
1193 layerData.push_back (
LayerData (layer.numNodes (), itWeight, itGradient,
1194 layer.activationFunction (),
1195 layer.inverseActivationFunction (),
1196 layer.modeOutputValues ()));
1201 layerData.back ().setDropOut (itDropOut);
1208 itDropOut += layer.numNodes ();
1210 size_t _numWeights = layer.numWeights (numNodesPrev);
1211 totalNumWeights += _numWeights;
1212 itWeight += _numWeights;
1213 itGradient += _numWeights;
1214 numNodesPrev = layer.numNodes ();
1215 totalNumNodes += numNodesPrev;
1218 assert (totalNumWeights > 0);
1219 return layerPatternData;
1224 template <
typename LayerContainer>
1226 std::vector<LayerData>& layerData)
const
1228 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1229 size_t cumulativeNodeCount = 0;
1230 for (; idxLayer < idxLayerEnd; ++idxLayer)
1232 LayerData& prevLayerData = layerData.at (idxLayer);
1233 LayerData& currLayerData = layerData.at (idxLayer+1);
1235 forward (prevLayerData, currLayerData);
1244 template <
typename LayerContainer,
typename LayerPatternContainer>
1246 LayerPatternContainer& layerPatternData,
1247 std::vector<double>& valuesMean,
1248 std::vector<double>& valuesStdDev,
1249 size_t trainFromLayer)
const
1251 valuesMean.clear ();
1252 valuesStdDev.clear ();
1255 size_t cumulativeNodeCount = 0;
1256 for (
size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1258 bool doTraining = idxLayer >= trainFromLayer;
1261 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1262 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1264 size_t numPattern = prevLayerPatternData.size ();
1265 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1267 std::vector<MeanVariance> means (numNodesLayer);
1269 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1271 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1272 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1275 forward (prevLayerData, currLayerData);
1279 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1282 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1292 cumulativeNodeCount += numNodesLayer;
1299 template <
typename OutputContainer>
1305 outputContainer.insert (outputContainer.end (), lastLayerData.
valuesBegin (), lastLayerData.
valuesEnd ());
1311 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1320 template <
typename OutputContainer>
1321 void Net::fetchOutput (
const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer)
const
1323 for (
const LayerData& lastLayerData : lastLayerPatternData)
1329 template <
typename ItWeight>
1331 std::vector<LayerData>& lastLayerData,
1333 ItWeight itWeightBegin,
1334 ItWeight itWeightEnd)
const
1336 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1339 typename std::vector<Pattern>::const_iterator itPattern = batch.
begin ();
1340 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.
end ();
1342 double sumWeights (0.0);
1343 double sumError (0.0);
1345 size_t idxPattern = 0;
1347 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1353 const Pattern& _pattern = (*itPattern);
1355 itWeightBegin, itWeightEnd,
1358 sumWeights += fabs (_pattern.
weight ());
1361 return std::make_tuple (sumError, sumWeights);
1366 template <
typename Settings>
1369 size_t trainFromLayer,
1370 size_t totalNumWeights)
const
1372 bool doTraining = layerPatternData.size () > trainFromLayer;
1376 size_t idxLayer = layerPatternData.size ();
1377 for (
auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1378 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1381 if (idxLayer <= trainFromLayer)
1384 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1385 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1387 size_t idxPattern = 0;
1389 for (
typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1390 itPrevLayerData = begin (prevLayerDataColl) ;
1391 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1393 LayerData& currLayerData = (*itCurrLayerData);
1394 LayerData& prevLayerData = *(itPrevLayerData);
1396 backward (prevLayerData, currLayerData);
1417 template <
typename LayerContainer,
typename PassThrough,
typename ItWeight,
typename ItGradient,
typename OutContainer>
1419 ItWeight itWeightBegin, ItWeight itWeightEnd,
1420 ItGradient itGradientBegin, ItGradient itGradientEnd,
1421 size_t trainFromLayer,
1422 OutContainer& outputContainer,
bool doFetchOutput)
const
1424 Settings& settings = std::get<0>(settingsAndBatch);
1425 Batch& batch = std::get<1>(settingsAndBatch);
1426 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1428 double sumError = 0.0;
1429 double sumWeights = 0.0;
1433 size_t totalNumWeights (0);
1434 std::vector<std::vector<LayerData>> layerPatternData =
prepareLayerData (_layers,
1446 std::vector<double> valuesMean;
1447 std::vector<double> valuesStdDev;
1448 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1454 fetchOutput (layerPatternData.back (), outputContainer);
1459 std::tie (sumError, sumWeights) =
computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1463 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1467 double batchSize = std::distance (std::begin (batch), std::end (batch));
1468 for (
auto it = itGradientBegin; it != itGradientEnd; ++it)
1472 sumError /= sumWeights;
1482 template <
typename OutIterator>
1495 for (
auto& layer:
layers ())
1497 double nIn = numInput;
1498 double stdDev =
sqrt (2.0/nIn);
1499 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1504 numInput = layer.numNodes ();
1519 for (
auto& layer:
layers ())
1521 double nIn = numInput;
1522 double minVal = -
sqrt(2.0/nIn);
1523 double maxVal =
sqrt (2.0/nIn);
1524 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1530 numInput = layer.numNodes ();
1545 for (
auto& layer:
layers ())
1548 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1553 numInput = layer.numNodes ();
1568 for (
auto& layer:
layers ())
1570 double nIn = numInput;
1571 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1576 numInput = layer.numNodes ();
1591 template <
typename Container,
typename ItWeight>
1595 ItWeight itWeightEnd,
1596 double patternWeight,
1597 double factorWeightDecay,
1614 std::vector<double> probabilities = layerData.
probabilities ();
1615 error =
crossEntropy (begin (probabilities), end (probabilities),
1616 begin (truth), end (truth),
1624 std::cout <<
"softmax." << std::endl;
1626 std::vector<double> probabilities = layerData.
probabilities ();
1628 begin (truth), end (truth),
1637 error =
weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
char * Form(const char *fmt,...)
Double_t(* Function)(Double_t)
std::vector< double >::const_iterator const_iterator
std::vector< double > & output()
The Batch class encapsulates one mini-batch.
const_iterator begin() const
const_iterator end() const
LayerData holds the data of one layer.
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
ModeOutputValues outputMode() const
returns the output mode
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
std::shared_ptr< std::function< double(double)> > activationFunction() const
container_type probabilities() const
computes the probabilities from the current node values and returns them
DropContainer::const_iterator const_dropout_iterator
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
std::vector< Layer > m_layers
layer-structure-data
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
size_t inputSize() const
input size of the DNN
ModeErrorFunction m_eErrorFunction
denotes the error function
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
const std::vector< Layer > & layers() const
returns the layers (structure)
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
size_t outputSize() const
output size of the DNN
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
IPythonInteractive * fInteractive
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Settings for the training of the neural net.
bool useMultithreading() const
is multithreading turned on?
EnumRegularization regularization() const
some regularization of the DNN is turned on?
size_t convergenceCount() const
returns the current convergence count
size_t testRepetitions() const
how often is the test data tested
virtual void endTestCycle()
callback for monitoring and loggging
virtual void testIteration()
callback for monitoring and loggging
virtual bool hasConverged(double testError)
has this training converged already?
virtual void cycle(double progress, TString text)
virtual void endTrainCycle(double)
callback for monitoring and logging
const std::vector< double > & dropFractions() const
void addPoint(std::string histoName, double x)
for monitoring
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
virtual void startTrainCycle()
size_t convergenceSteps() const
how many steps until training is deemed to have converged
double factorWeightDecay() const
get the weight-decay factor
size_t maxConvergenceCount() const
returns the max convergence count so far
void pads(int numPads)
preparation for monitoring
size_t batchSize() const
mini-batch size
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
size_t dropRepetitions() const
void create(std::string histoName, int bins, double min, double max)
for monitoring
virtual void startTestCycle()
callback for monitoring and loggging
double m_beta
internal parameter (momentum)
std::vector< double > m_localGradients
local gradients for reuse in thread.
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
double m_alpha
internal parameter (learningRate)
std::vector< double > m_localWeights
local weights for reuse in thread.
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
std::shared_ptr< std::function< double(double)> > InvGauss
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double uniformDouble(double minValue, double maxValue)
std::shared_ptr< std::function< double(double)> > SymmReLU
std::shared_ptr< std::function< double(double)> > TanhShift
std::shared_ptr< std::function< double(double)> > Tanh
std::shared_ptr< std::function< double(double)> > InvSigmoid
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
T uniformFromTo(T from, T to)
double computeRegularization< EnumRegularization::L1 >(double weight, const double &factorWeightDecay)
std::shared_ptr< std::function< double(double)> > SoftPlus
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
std::shared_ptr< std::function< double(double)> > ZeroFnc
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
std::shared_ptr< std::function< double(double)> > InvSoftSign
std::shared_ptr< std::function< double(double)> > InvGaussComplement
double computeRegularization< EnumRegularization::L2 >(double weight, const double &factorWeightDecay)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
@ CROSSENTROPY_MUTUALEXCLUSIVE
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
std::shared_ptr< std::function< double(double)> > InvTanh
std::shared_ptr< std::function< double(double)> > Linear
WeightInitializationStrategy
weight initialization strategies to be chosen from
std::shared_ptr< std::function< double(double)> > InvReLU
std::shared_ptr< std::function< double(double)> > GaussComplement
std::shared_ptr< std::function< double(double)> > Gauss
std::shared_ptr< std::function< double(double)> > Sigmoid
double gaussDouble(double mean, double sigma)
std::shared_ptr< std::function< double(double)> > SoftSign
std::shared_ptr< std::function< double(double)> > InvSoftPlus
std::shared_ptr< std::function< double(double)> > ReLU
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
bool isFlagSet(T flag, T value)
std::shared_ptr< std::function< double(double)> > InvTanhShift
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
std::vector< char > DropContainer
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
std::shared_ptr< std::function< double(double)> > InvSymmReLU
std::shared_ptr< std::function< double(double)> > InvLinear
create variable transformations
static void output(int code)