43template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>,
44 typename DeepNet_t = TDeepNet<Architecture_t, Layer_t>>
47 using Matrix_t =
typename Architecture_t::Matrix_t;
48 using Scalar_t =
typename Architecture_t::Scalar_t;
52 std::vector<std::vector<Matrix_t>>
54 std::vector<std::vector<Matrix_t>>
58 void UpdateWeights(
size_t layerIndex, std::vector<Matrix_t> &weights,
const std::vector<Matrix_t> &weightGradients);
61 void UpdateBiases(
size_t layerIndex, std::vector<Matrix_t> &biases,
const std::vector<Matrix_t> &biasGradients);
84template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
86 :
VOptimizer<Architecture_t, Layer_t, DeepNet_t>(learningRate, deepNet), fMomentum(momentum)
88 std::vector<Layer_t *> &layers = deepNet.
GetLayers();
89 size_t layersNSlices = layers.size();
93 for (
size_t i = 0; i < layersNSlices; i++) {
94 size_t weightsNSlices = (layers[i]->GetWeights()).size();
96 for (
size_t j = 0; j < weightsNSlices; j++) {
97 Matrix_t ¤tWeights = layers[i]->GetWeightsAt(j);
98 size_t weightsNRows = currentWeights.GetNrows();
99 size_t weightsNCols = currentWeights.GetNcols();
105 size_t biasesNSlices = (layers[i]->GetBiases()).size();
107 for (
size_t j = 0; j < biasesNSlices; j++) {
108 Matrix_t ¤tBiases = layers[i]->GetBiasesAt(j);
109 size_t biasesNRows = currentBiases.GetNrows();
110 size_t biasesNCols = currentBiases.GetNcols();
121template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
123 const std::vector<Matrix_t> &weightGradients) ->
void
128 std::vector<Matrix_t> ¤tLayerPastWeightGradients = this->GetPastWeightGradientsAt(layerIndex);
130 for (
size_t k = 0; k < currentLayerPastWeightGradients.size(); k++) {
131 Architecture_t::ConstMult(currentLayerPastWeightGradients[k], this->GetMomentum());
132 Architecture_t::ScaleAdd(currentLayerPastWeightGradients[k], weightGradients[k], 1.0);
137 for (
size_t i = 0; i < weights.size(); i++) {
138 Architecture_t::ScaleAdd(weights[i], currentLayerPastWeightGradients[i], -this->GetLearningRate());
143template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
145 const std::vector<Matrix_t> &biasGradients) ->
void
150 std::vector<Matrix_t> ¤tLayerPastBiasGradients = this->GetPastBiasGradientsAt(layerIndex);
152 for (
size_t k = 0; k < currentLayerPastBiasGradients.size(); k++) {
153 Architecture_t::ConstMult(currentLayerPastBiasGradients[k], this->GetMomentum());
154 Architecture_t::ScaleAdd(currentLayerPastBiasGradients[k], biasGradients[k], 1.0);
159 for (
size_t i = 0; i < biases.size(); i++) {
160 Architecture_t::ScaleAdd(biases[i], currentLayerPastBiasGradients[i], -this->GetLearningRate());
Stochastic Batch Gradient Descent Optimizer class.
void UpdateWeights(size_t layerIndex, std::vector< Matrix_t > &weights, const std::vector< Matrix_t > &weightGradients)
Update the weights, given the current weight gradients.
~TSGD()=default
Destructor.
Scalar_t fMomentum
The momentum used for training.
typename Architecture_t::Scalar_t Scalar_t
TSGD(Scalar_t learningRate, DeepNet_t &deepNet, Scalar_t momentum)
Constructor.
std::vector< std::vector< Matrix_t > > & GetPastBiasGradients()
std::vector< std::vector< Matrix_t > > fPastBiasGradients
The sum of the past bias gradients associated with the deep net.
std::vector< std::vector< Matrix_t > > & GetPastWeightGradients()
void UpdateBiases(size_t layerIndex, std::vector< Matrix_t > &biases, const std::vector< Matrix_t > &biasGradients)
Update the biases, given the current bias gradients.
std::vector< Matrix_t > & GetPastWeightGradientsAt(size_t i)
std::vector< Matrix_t > & GetPastBiasGradientsAt(size_t i)
std::vector< std::vector< Matrix_t > > fPastWeightGradients
The sum of the past weight gradients associated with the deep net.
typename Architecture_t::Matrix_t Matrix_t
Scalar_t GetMomentum() const
Getters.
std::vector< Layer_t * > & GetLayers()
create variable transformations