51template<
typename Architecture_t>
56 using Scalar_t =
typename Architecture_t::Scalar_t;
57 using Matrix_t =
typename Architecture_t::Matrix_t;
58 using Tensor_t =
typename Architecture_t::Tensor_t;
102 const Matrix_t & activations_backward,
146template<
typename Architecture_t>
152 using Scalar_t =
typename Architecture_t::Scalar_t;
153 using Matrix_t =
typename Architecture_t::Matrix_t;
154 using Tensor_t =
typename Architecture_t::Tensor_t;
192 const Matrix_t & activations_backward,
226template<
typename Architecture_t>
242template<
typename Architecture_t>
259template<
typename Architecture_t>
268template<
typename Architecture_t>
286template<
typename Architecture_t>
288 const Matrix_t & activations_backward,
294 Tensor_t tGradBw(gradients_backward);
295 Tensor_t tActBw(activations_backward);
299 Architecture_t::Hadamard( tDeriv, tActGrad);
300 Architecture_t::Backward( tGradBw,
313template<
typename Architecture_t>
316 std::cout <<
"Width = " <<
fWeights.GetNrows();
317 std::cout <<
", Activation Function = ";
318 std::cout << static_cast<int>(
fF) << std::endl;
327template<
typename Architecture_t>
342template<
typename Architecture_t>
355template<
typename Architecture_t>
372template<
typename Architecture_t>
374 const Matrix_t & activations_backward,
379 Architecture_t::Backward(gradients_backward,
385 activations_backward);
392template<
typename Architecture_t>
395 std::cout <<
"Width = " <<
fWeights.GetNrows();
396 std::cout <<
", Activation Function = ";
397 std::cout << static_cast<int>(
fF) << std::endl;
double evaluate() const override
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
const Matrix_t & GetBiasGradients() const
void SetDropoutProbability(Scalar_t p)
const Matrix_t & GetActivationGradients() const
EActivationFunction fF
Activation function of the layer.
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
TLayer(size_t BatchSize, size_t InputWidth, size_t Width, EActivationFunction f, Scalar_t dropoutProbability)
size_t fInputWidth
Number of neurons of the previous layer.
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
const Matrix_t & GetBiases() const
const Matrix_t & GetOutput() const
Matrix_t fOutput
Activations of this layer.
EActivationFunction GetActivationFunction() const
const Matrix_t & GetWeightGradients() const
size_t GetBatchSize() const
size_t GetInputWidth() const
Matrix_t & GetBiasGradients()
Scalar_t fDropoutProbability
Probability that an input is active.
Matrix_t fBiases
The bias values of this layer.
Matrix_t & GetActivationGradients()
size_t fWidth
Number of neurons of this layer.
typename Architecture_t::Matrix_t Matrix_t
typename Architecture_t::Scalar_t Scalar_t
size_t GetDropoutProbability() const
typename Architecture_t::Tensor_t Tensor_t
Matrix_t fWeightGradients
Gradients w.r.t. the weights of this layer.
size_t fBatchSize
Batch size used for training and evaluation.
void Initialize(EInitialization m)
Initialize fWeights according to the given initialization method.
Matrix_t fWeights
The fWeights of this layer.
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Matrix_t & GetWeightGradients()
const Matrix_t & GetWeights() const
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Matrix_t & fBiases
Reference to the bias vectors of this layer.
Matrix_t fOutput
Activations of this layer.
TSharedLayer(size_t fBatchSize, TLayer< Architecture_t > &layer)
Matrix_t & fWeights
Reference to the weight matrix of this layer.
Matrix_t & GetActivationGradients()
const Matrix_t & GetBiasGradients() const
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Matrix_t & GetWeightGradients()
typename Architecture_t::Tensor_t Tensor_t
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
const Matrix_t & GetBiases() const
typename Architecture_t::Matrix_t Matrix_t
size_t fInputWidth
Number of neurons of the previous layer.
size_t fWidth
Number of neurons of this layer.
const Matrix_t & GetWeightGradients() const
size_t fBatchSize
Batch size used for training and evaluation.
Matrix_t fWeightGradients
Gradients w.r.t. the weights of this layer.
EActivationFunction GetActivationFunction() const
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
size_t GetDropoutProbability() const
size_t GetInputWidth() const
size_t GetBatchSize() const
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
typename Architecture_t::Scalar_t Scalar_t
EActivationFunction fF
Activation function of the layer.
Scalar_t fDropoutProbability
Probability that an input is active.
Matrix_t & GetWeights() const
const Matrix_t & GetOutput() const
const Matrix_t & GetActivationGradients() const
Matrix_t & GetBiasGradients()
void SetDropoutProbability(Scalar_t p)
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
void addRegularizationGradients(typename Architecture_t::Matrix_t &A, const typename Architecture_t::Matrix_t &W, typename Architecture_t::Scalar_t weightDecay, ERegularization R)
Add the regularization gradient corresponding to weight matrix W, to the matrix A.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
void evaluateDerivative(typename Architecture_t::Tensor_t &B, EActivationFunction f, const typename Architecture_t::Tensor_t &A)
Compute the first partial derivative of the activation function for the values given in tensor A and ...
create variable transformations