27#ifndef TMVA_DNN_ADADELTA
28#define TMVA_DNN_ADADELTA
43template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>,
44 typename DeepNet_t = TDeepNet<Architecture_t, Layer_t>>
47 using Matrix_t =
typename Architecture_t::Matrix_t;
48 using Scalar_t =
typename Architecture_t::Scalar_t;
101template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
106 std::vector<Layer_t *> &layers =
deepNet.GetLayers();
139 Architecture_t::CreateWeightTensors(
fWorkBiasTensor1[i], layers[i]->GetBiases());
141 Architecture_t::CreateWeightTensors(
fWorkBiasTensor2[i], layers[i]->GetBiases());
146template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
177 Architecture_t::ConstAdd(
dummy1, this->GetEpsilon());
178 Architecture_t::SqrtElementWise(
dummy1);
205template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
235 Architecture_t::ConstAdd(
dummy1, this->GetEpsilon());
236 Architecture_t::SqrtElementWise(
dummy1);
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Adadelta Optimizer class.
std::vector< std::vector< Matrix_t > > fWorkBiasTensor2
working tensor used to keep a temporary copy of bias or bias gradients
Scalar_t GetRho() const
Getters.
Scalar_t fEpsilon
The Smoothing term used to avoid division by zero.
std::vector< std::vector< Matrix_t > > & GetPastSquaredWeightUpdates()
void UpdateWeights(size_t layerIndex, std::vector< Matrix_t > &weights, const std::vector< Matrix_t > &weightGradients)
Update the weights, given the current weight gradients.
std::vector< Matrix_t > & GetPastSquaredBiasGradientsAt(size_t i)
std::vector< Matrix_t > & GetPastSquaredWeightGradientsAt(size_t i)
Scalar_t fRho
The Rho constant used by the optimizer.
std::vector< std::vector< Matrix_t > > & GetPastSquaredBiasUpdates()
std::vector< Matrix_t > & GetPastSquaredWeightUpdatesAt(size_t i)
std::vector< std::vector< Matrix_t > > fWorkBiasTensor1
working tensor used to keep a temporary copy of bias or bias gradients
std::vector< std::vector< Matrix_t > > fPastSquaredBiasUpdates
The accumulation of the square of the past bias updates associated with the deep net.
void UpdateBiases(size_t layerIndex, std::vector< Matrix_t > &biases, const std::vector< Matrix_t > &biasGradients)
Update the biases, given the current bias gradients.
TAdadelta(DeepNet_t &deepNet, Scalar_t learningRate=1.0, Scalar_t rho=0.95, Scalar_t epsilon=1e-8)
Constructor.
Scalar_t GetEpsilon() const
std::vector< std::vector< Matrix_t > > & GetPastSquaredBiasGradients()
std::vector< std::vector< Matrix_t > > fWorkWeightTensor2
working tensor used to keep a temporary copy of weights or weight gradients
~TAdadelta()=default
Destructor.
std::vector< std::vector< Matrix_t > > fPastSquaredWeightUpdates
The accumulation of the square of the past weight updates associated with the deep net.
std::vector< std::vector< Matrix_t > > & GetPastSquaredWeightGradients()
typename Architecture_t::Scalar_t Scalar_t
typename Architecture_t::Matrix_t Matrix_t
std::vector< std::vector< Matrix_t > > fPastSquaredWeightGradients
The accumulation of the square of the past weight gradients associated with the deep net.
std::vector< std::vector< Matrix_t > > fWorkWeightTensor1
working tensor used to keep a temporary copy of weights or weight gradients
std::vector< std::vector< Matrix_t > > fPastSquaredBiasGradients
The accumulation of the square of the past bias gradients associated with the deep net.
std::vector< Matrix_t > & GetPastSquaredBiasUpdatesAt(size_t i)
create variable transformations