51template<
typename Architecture_t>
56 using Scalar_t =
typename Architecture_t::Scalar_t;
57 using Matrix_t =
typename Architecture_t::Matrix_t;
58 using Tensor_t =
typename Architecture_t::Tensor_t;
102 const Matrix_t & activations_backward,
146template<
typename Architecture_t>
152 using Scalar_t =
typename Architecture_t::Scalar_t;
153 using Matrix_t =
typename Architecture_t::Matrix_t;
154 using Tensor_t =
typename Architecture_t::Tensor_t;
192 const Matrix_t & activations_backward,
226template<
typename Architecture_t>
232 : fBatchSize(batchSize), fInputWidth(inputWidth), fWidth(
width),
233 fDropoutProbability(dropoutProbability), fWeights(
width, fInputWidth),
234 fBiases(
width, 1), fOutput(fBatchSize,
width), fDerivatives(fBatchSize,
width),
235 fWeightGradients(
width, fInputWidth), fBiasGradients(
width, 1),
236 fActivationGradients(fBatchSize,
width), fF(
f)
242template<
typename Architecture_t>
244 : fBatchSize(layer.fBatchSize), fInputWidth(layer.fInputWidth),
245 fWidth(layer.fWidth), fDropoutProbability(layer.fDropoutProbability),
246 fWeights(layer.fWidth, layer.fInputWidth), fBiases(layer.fWidth, 1),
247 fOutput(layer.fBatchSize, layer.fWidth),
248 fDerivatives(layer.fBatchSize, layer.fWidth),
249 fWeightGradients(layer.fWidth, layer.fInputWidth),
250 fBiasGradients(layer.fWidth, 1),
251 fActivationGradients(layer.fBatchSize, layer.fWidth),
259template<
typename Architecture_t>
263 initialize<Architecture_t>(fWeights,
m);
268template<
typename Architecture_t>
273 if (applyDropout && (fDropoutProbability != 1.0)) {
274 Architecture_t::DropoutForward(input, fDropoutProbability);
276 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
277 Architecture_t::AddRowWise(fOutput, fBiases);
279 Tensor_t tDerivatives(fDerivatives);
280 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
282 evaluate<Architecture_t>(tOutput, fF);
286template<
typename Architecture_t>
288 const Matrix_t & activations_backward,
294 Tensor_t tGradBw(gradients_backward);
295 Tensor_t tActBw(activations_backward);
296 Tensor_t tActGrad(fActivationGradients);
299 Architecture_t::Hadamard( tDeriv, tActGrad);
300 Architecture_t::Backward( tGradBw,
307 addRegularizationGradients<Architecture_t>(fWeightGradients,
313template<
typename Architecture_t>
316 std::cout <<
"Width = " << fWeights.GetNrows();
317 std::cout <<
", Activation Function = ";
318 std::cout << static_cast<int>(fF) << std::endl;
327template<
typename Architecture_t>
330: fBatchSize(BatchSize),
331fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
332fDropoutProbability(layer.GetDropoutProbability()),
333fWeights(layer.GetWeights()), fBiases(layer.GetBiases()),
334fOutput(fBatchSize, fWidth), fDerivatives(fBatchSize, fWidth),
335fWeightGradients(fWidth, fInputWidth), fBiasGradients(fWidth, 1),
336fActivationGradients(fBatchSize, fWidth), fF(layer.GetActivationFunction())
342template<
typename Architecture_t>
344 : fBatchSize(layer.fBatchSize),
345 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
346 fDropoutProbability(layer.fDropoutProbability), fWeights(layer.fWeights),
347 fBiases(layer.fBiases), fOutput(layer.fBatchSize, fWidth),
348 fDerivatives(layer.fBatchSize, fWidth), fWeightGradients(fWidth, fInputWidth),
349 fBiasGradients(fWidth, 1), fActivationGradients(layer.fBatchSize, fWidth),
355template<
typename Architecture_t>
360 if (applyDropout && (fDropoutProbability != 1.0)) {
361 Architecture_t::DropoutForward(input, fDropoutProbability);
363 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
364 Architecture_t::AddRowWise(fOutput, fBiases);
366 Tensor_t tDerivatives(fDerivatives);
367 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
368 evaluate<Architecture_t>(tOutput, fF);
372template<
typename Architecture_t>
374 const Matrix_t & activations_backward,
379 Architecture_t::Backward(gradients_backward,
383 fActivationGradients,
385 activations_backward);
386 addRegularizationGradients<Architecture_t>(fWeightGradients,
392template<
typename Architecture_t>
395 std::cout <<
"Width = " << fWeights.GetNrows();
396 std::cout <<
", Activation Function = ";
397 std::cout << static_cast<int>(fF) << std::endl;
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
const Matrix_t & GetBiasGradients() const
void SetDropoutProbability(Scalar_t p)
const Matrix_t & GetActivationGradients() const
EActivationFunction fF
Activation function of the layer.
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
TLayer(size_t BatchSize, size_t InputWidth, size_t Width, EActivationFunction f, Scalar_t dropoutProbability)
size_t fInputWidth
Number of neurons of the previous layer.
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
const Matrix_t & GetBiases() const
const Matrix_t & GetOutput() const
Matrix_t fOutput
Activations of this layer.
EActivationFunction GetActivationFunction() const
const Matrix_t & GetWeightGradients() const
size_t GetBatchSize() const
size_t GetInputWidth() const
Matrix_t & GetBiasGradients()
Scalar_t fDropoutProbability
Probability that an input is active.
Matrix_t fBiases
The bias values of this layer.
Matrix_t & GetActivationGradients()
size_t fWidth
Number of neurons of this layer.
typename Architecture_t::Matrix_t Matrix_t
typename Architecture_t::Scalar_t Scalar_t
size_t GetDropoutProbability() const
typename Architecture_t::Tensor_t Tensor_t
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
size_t fBatchSize
Batch size used for training and evaluation.
void Initialize(EInitialization m)
Initialize fWeights according to the given initialization method.
Matrix_t fWeights
The fWeights of this layer.
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Matrix_t & GetWeightGradients()
const Matrix_t & GetWeights() const
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Layer class width shared weight and bias layers.
Matrix_t & fBiases
Reference to the bias vectors of this layer.
Matrix_t fOutput
Activations of this layer.
TSharedLayer(size_t fBatchSize, TLayer< Architecture_t > &layer)
Matrix_t & fWeights
Reference to the weight matrix of this layer.
Matrix_t & GetActivationGradients()
const Matrix_t & GetBiasGradients() const
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Matrix_t & GetWeightGradients()
typename Architecture_t::Tensor_t Tensor_t
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
const Matrix_t & GetBiases() const
typename Architecture_t::Matrix_t Matrix_t
size_t fInputWidth
Number of neurons of the previous layer.
size_t fWidth
Number of neurons of this layer.
const Matrix_t & GetWeightGradients() const
size_t fBatchSize
Batch size used for training and evaluation.
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
EActivationFunction GetActivationFunction() const
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
size_t GetDropoutProbability() const
size_t GetInputWidth() const
size_t GetBatchSize() const
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
typename Architecture_t::Scalar_t Scalar_t
EActivationFunction fF
Activation function of the layer.
Scalar_t fDropoutProbability
Probability that an input is active.
Matrix_t & GetWeights() const
const Matrix_t & GetOutput() const
const Matrix_t & GetActivationGradients() const
Matrix_t & GetBiasGradients()
void SetDropoutProbability(Scalar_t p)
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
create variable transformations