47 using Matrix_t =
typename Architecture_t::Matrix_t;
48 using Scalar_t =
typename Architecture_t::Scalar_t;
68 void UpdateWeights(
size_t layerIndex, std::vector<Matrix_t> &weights,
const std::vector<Matrix_t> &weightGradients);
71 void UpdateBiases(
size_t layerIndex, std::vector<Matrix_t> &biases,
const std::vector<Matrix_t> &biasGradients);
106 std::vector<Layer_t *> &layers = deepNet.GetLayers();
107 const size_t layersNSlices = layers.size();
117 for (
size_t i = 0;
i < layersNSlices;
i++) {
118 const size_t weightsNSlices = (layers[
i]->GetWeights()).size();
123 for (
size_t j = 0; j < weightsNSlices; j++) {
128 const size_t biasesNSlices = (layers[
i]->GetBiases()).size();
133 for (
size_t j = 0; j < biasesNSlices; j++) {
148 const std::vector<Matrix_t> &weightGradients) ->
void
153 const size_t weightsNSlices = weights.size();
154 assert(currentLayerPastSquaredWeightGradients.size() == weightsNSlices);
156 for (
size_t i = 0;
i < weightsNSlices;
i++) {
164 Architecture_t::Copy(currentSquaredWeightGradients, weightGradients[
i]);
165 Architecture_t::SquareElementWise(currentSquaredWeightGradients);
166 Architecture_t::ScaleAdd(accumulation, currentLayerPastSquaredWeightGradients[
i], this->
GetRho());
167 Architecture_t::ScaleAdd(accumulation, currentSquaredWeightGradients, 1 - (this->
GetRho()));
168 Architecture_t::Copy(currentLayerPastSquaredWeightGradients[
i], accumulation);
176 Architecture_t::Copy(dummy1, currentLayerPastSquaredWeightUpdates[
i]);
177 Architecture_t::ConstAdd(dummy1, this->
GetEpsilon());
178 Architecture_t::SqrtElementWise(dummy1);
182 Architecture_t::Copy(currentWeightUpdates, currentLayerPastSquaredWeightGradients[
i]);
183 Architecture_t::ConstAdd(currentWeightUpdates, this->
GetEpsilon());
184 Architecture_t::SqrtElementWise(currentWeightUpdates);
185 Architecture_t::ReciprocalElementWise(currentWeightUpdates);
186 Architecture_t::Hadamard(currentWeightUpdates, weightGradients[
i]);
187 Architecture_t::Hadamard(currentWeightUpdates, dummy1);
190 Architecture_t::ScaleAdd(weights[
i], currentWeightUpdates, -this->GetLearningRate());
196 Architecture_t::Copy(currentSquaredWeightUpdates, currentWeightUpdates);
197 Architecture_t::SquareElementWise(currentSquaredWeightUpdates);
198 Architecture_t::ScaleAdd(accumulation, currentLayerPastSquaredWeightUpdates[
i], this->
GetRho());
199 Architecture_t::ScaleAdd(accumulation, currentSquaredWeightUpdates, 1 - (this->
GetRho()));
200 Architecture_t::Copy(currentLayerPastSquaredWeightUpdates[
i], accumulation);
207 const std::vector<Matrix_t> &biasGradients) ->
void
212 const size_t biasesNSlices = biases.size();
213 assert(currentLayerPastSquaredBiasGradients.size() == biasesNSlices);
214 for (
size_t i = 0;
i < biasesNSlices;
i++) {
223 Architecture_t::Copy(currentSquaredBiasGradients, biasGradients[
i]);
224 Architecture_t::SquareElementWise(currentSquaredBiasGradients);
225 Architecture_t::ScaleAdd(accumulation, currentLayerPastSquaredBiasGradients[
i], this->
GetRho());
226 Architecture_t::ScaleAdd(accumulation, currentSquaredBiasGradients, 1 - (this->
GetRho()));
227 Architecture_t::Copy(currentLayerPastSquaredBiasGradients[
i], accumulation);
234 Architecture_t::Copy(dummy1, currentLayerPastSquaredBiasUpdates[
i]);
235 Architecture_t::ConstAdd(dummy1, this->
GetEpsilon());
236 Architecture_t::SqrtElementWise(dummy1);
239 Architecture_t::Copy(currentBiasUpdates, currentLayerPastSquaredBiasGradients[
i]);
240 Architecture_t::ConstAdd(currentBiasUpdates, this->
GetEpsilon());
241 Architecture_t::SqrtElementWise(currentBiasUpdates);
242 Architecture_t::ReciprocalElementWise(currentBiasUpdates);
243 Architecture_t::Hadamard(currentBiasUpdates, biasGradients[
i]);
244 Architecture_t::Hadamard(currentBiasUpdates, dummy1);
247 Architecture_t::ScaleAdd(biases[
i], currentBiasUpdates, -this->GetLearningRate());
254 Architecture_t::Copy(currentSquaredBiasUpdates, currentBiasUpdates);
255 Architecture_t::SquareElementWise(currentSquaredBiasUpdates);
256 Architecture_t::ScaleAdd(accumulation, currentLayerPastSquaredBiasUpdates[
i], this->
GetRho());
257 Architecture_t::ScaleAdd(accumulation, currentSquaredBiasUpdates, 1 - (this->
GetRho()));
258 Architecture_t::Copy(currentLayerPastSquaredBiasUpdates[
i], accumulation);