27#ifndef TMVA_DNN_RMSPROP
28#define TMVA_DNN_RMSPROP
42template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>,
43 typename DeepNet_t = TDeepNet<Architecture_t, Layer_t>>
46 using Matrix_t =
typename Architecture_t::Matrix_t;
47 using Scalar_t =
typename Architecture_t::Scalar_t;
53 std::vector<std::vector<Matrix_t>>
55 std::vector<std::vector<Matrix_t>>
62 void UpdateWeights(
size_t layerIndex, std::vector<Matrix_t> &weights,
const std::vector<Matrix_t> &weightGradients);
65 void UpdateBiases(
size_t layerIndex, std::vector<Matrix_t> &biases,
const std::vector<Matrix_t> &biasGradients);
97template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
100 :
VOptimizer<Architecture_t, Layer_t, DeepNet_t>(learningRate, deepNet), fMomentum(momentum), fRho(rho),
103 std::vector<Layer_t *> &layers = deepNet.
GetLayers();
104 const size_t layersNSlices = layers.size();
110 for (
size_t i = 0; i < layersNSlices; i++) {
111 const size_t weightsNSlices = (layers[i]->GetWeights()).size();
113 for (
size_t j = 0; j < weightsNSlices; j++) {
114 Matrix_t ¤tWeights = layers[i]->GetWeightsAt(j);
115 const size_t weightsNRows = currentWeights.GetNrows();
116 const size_t weightsNCols = currentWeights.GetNcols();
124 const size_t biasesNSlices = (layers[i]->GetBiases()).size();
126 for (
size_t j = 0; j < biasesNSlices; j++) {
127 Matrix_t ¤tBiases = layers[i]->GetBiasesAt(j);
128 const size_t biasesNRows = currentBiases.GetNrows();
129 const size_t biasesNCols = currentBiases.GetNcols();
140template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
142 const std::vector<Matrix_t> &weightGradients) ->
void
144 std::vector<Matrix_t> ¤tLayerPastSquaredWeightGradients = this->GetPastSquaredWeightGradientsAt(layerIndex);
145 std::vector<Matrix_t> ¤tLayerWeightUpdates = this->GetWeightUpdatesAt(layerIndex);
147 for (
size_t k = 0; k < currentLayerPastSquaredWeightGradients.size(); k++) {
150 Matrix_t accumulation(currentLayerPastSquaredWeightGradients[k].GetNrows(),
151 currentLayerPastSquaredWeightGradients[k].GetNcols());
155 Matrix_t currentSquaredWeightGradients(weightGradients[k].GetNrows(), weightGradients[k].GetNcols());
157 Architecture_t::SquareElementWise(currentSquaredWeightGradients);
158 Architecture_t::ScaleAdd(accumulation, currentLayerPastSquaredWeightGradients[k], this->GetRho());
159 Architecture_t::ScaleAdd(accumulation, currentSquaredWeightGradients, 1 - (this->GetRho()));
164 Matrix_t dummy(currentLayerPastSquaredWeightGradients[k].GetNrows(),
165 currentLayerPastSquaredWeightGradients[k].GetNcols());
167 Architecture_t::ConstAdd(
dummy, this->GetEpsilon());
168 Architecture_t::SqrtElementWise(
dummy);
169 Architecture_t::ReciprocalElementWise(
dummy);
170 Architecture_t::Hadamard(
dummy, weightGradients[k]);
172 Architecture_t::ScaleAdd(accumulation, currentLayerWeightUpdates[k], this->GetMomentum());
173 Architecture_t::ScaleAdd(accumulation,
dummy, this->GetLearningRate());
179 for (
size_t i = 0; i < weights.size(); i++) {
180 Architecture_t::ScaleAdd(weights[i], currentLayerWeightUpdates[i], -1.0);
185template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
187 const std::vector<Matrix_t> &biasGradients) ->
void
189 std::vector<Matrix_t> ¤tLayerPastSquaredBiasGradients = this->GetPastSquaredBiasGradientsAt(layerIndex);
190 std::vector<Matrix_t> ¤tLayerBiasUpdates = this->GetBiasUpdatesAt(layerIndex);
192 for (
size_t k = 0; k < currentLayerPastSquaredBiasGradients.size(); k++) {
195 Matrix_t accumulation(currentLayerPastSquaredBiasGradients[k].GetNrows(),
196 currentLayerPastSquaredBiasGradients[k].GetNcols());
200 Matrix_t currentSquaredBiasGradients(biasGradients[k].GetNrows(), biasGradients[k].GetNcols());
202 Architecture_t::SquareElementWise(currentSquaredBiasGradients);
203 Architecture_t::ScaleAdd(accumulation, currentLayerPastSquaredBiasGradients[k], this->GetRho());
204 Architecture_t::ScaleAdd(accumulation, currentSquaredBiasGradients, 1 - (this->GetRho()));
209 Matrix_t dummy(currentLayerPastSquaredBiasGradients[k].GetNrows(),
210 currentLayerPastSquaredBiasGradients[k].GetNcols());
212 Architecture_t::ConstAdd(
dummy, this->GetEpsilon());
213 Architecture_t::SqrtElementWise(
dummy);
214 Architecture_t::ReciprocalElementWise(
dummy);
215 Architecture_t::Hadamard(
dummy, biasGradients[k]);
217 Architecture_t::ScaleAdd(accumulation, currentLayerBiasUpdates[k], this->GetMomentum());
218 Architecture_t::ScaleAdd(accumulation,
dummy, this->GetLearningRate());
224 for (
size_t i = 0; i < biases.size(); i++) {
225 Architecture_t::ScaleAdd(biases[i], currentLayerBiasUpdates[i], -1.0);
static RooMathCoreReg dummy
Scalar_t fRho
The Rho constant used by the optimizer.
typename Architecture_t::Scalar_t Scalar_t
void UpdateWeights(size_t layerIndex, std::vector< Matrix_t > &weights, const std::vector< Matrix_t > &weightGradients)
Update the weights, given the current weight gradients.
~TRMSProp()=default
Destructor.
std::vector< Matrix_t > & GetPastSquaredWeightGradientsAt(size_t i)
std::vector< std::vector< Matrix_t > > fPastSquaredWeightGradients
The sum of the square of the past weight gradients associated with the deep net.
std::vector< std::vector< Matrix_t > > & GetBiasUpdates()
Scalar_t GetEpsilon() const
Scalar_t fMomentum
The momentum used for training.
std::vector< std::vector< Matrix_t > > & GetPastSquaredBiasGradients()
Scalar_t fEpsilon
The Smoothing term used to avoid division by zero.
TRMSProp(DeepNet_t &deepNet, Scalar_t learningRate=0.001, Scalar_t momentum=0.0, Scalar_t rho=0.9, Scalar_t epsilon=1e-7)
Constructor.
std::vector< std::vector< Matrix_t > > fPastSquaredBiasGradients
The sum of the square of the past bias gradients associated with the deep net.
std::vector< std::vector< Matrix_t > > fWeightUpdates
The accumulation of the past Weights for performing updates.
typename Architecture_t::Matrix_t Matrix_t
void UpdateBiases(size_t layerIndex, std::vector< Matrix_t > &biases, const std::vector< Matrix_t > &biasGradients)
Update the biases, given the current bias gradients.
std::vector< Matrix_t > & GetBiasUpdatesAt(size_t i)
std::vector< std::vector< Matrix_t > > & GetWeightUpdates()
std::vector< Matrix_t > & GetWeightUpdatesAt(size_t i)
std::vector< std::vector< Matrix_t > > & GetPastSquaredWeightGradients()
std::vector< std::vector< Matrix_t > > fBiasUpdates
The accumulation of the past Biases for performing updates.
Scalar_t GetMomentum() const
Getters.
std::vector< Matrix_t > & GetPastSquaredBiasGradientsAt(size_t i)
std::vector< Layer_t * > & GetLayers()
void Copy(void *source, void *dest)
Abstract ClassifierFactory template that handles arbitrary types.