51template<
typename Architecture_t>
56 using Scalar_t =
typename Architecture_t::Scalar_t;
57 using Matrix_t =
typename Architecture_t::Matrix_t;
58 using Tensor_t =
typename Architecture_t::Tensor_t;
102 const Matrix_t & activations_backward,
146template<
typename Architecture_t>
152 using Scalar_t =
typename Architecture_t::Scalar_t;
153 using Matrix_t =
typename Architecture_t::Matrix_t;
154 using Tensor_t =
typename Architecture_t::Tensor_t;
192 const Matrix_t & activations_backward,
226template<
typename Architecture_t>
232 : fBatchSize(batchSize), fInputWidth(inputWidth), fWidth(
width),
233 fDropoutProbability(dropoutProbability), fWeights(
width, fInputWidth),
234 fBiases(
width, 1), fOutput(fBatchSize,
width), fDerivatives(fBatchSize,
width),
235 fWeightGradients(
width, fInputWidth), fBiasGradients(
width, 1),
236 fActivationGradients(fBatchSize,
width), fF(
f)
242template<
typename Architecture_t>
244 : fBatchSize(layer.fBatchSize), fInputWidth(layer.fInputWidth),
245 fWidth(layer.fWidth), fDropoutProbability(layer.fDropoutProbability),
246 fWeights(layer.fWidth, layer.fInputWidth), fBiases(layer.fWidth, 1),
247 fOutput(layer.fBatchSize, layer.fWidth),
248 fDerivatives(layer.fBatchSize, layer.fWidth),
249 fWeightGradients(layer.fWidth, layer.fInputWidth),
250 fBiasGradients(layer.fWidth, 1),
251 fActivationGradients(layer.fBatchSize, layer.fWidth),
259template<
typename Architecture_t>
263 initialize<Architecture_t>(fWeights,
m);
268template<
typename Architecture_t>
273 if (applyDropout && (fDropoutProbability != 1.0)) {
274 Architecture_t::DropoutForward(input, fDropoutProbability);
276 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
277 Architecture_t::AddRowWise(fOutput, fBiases);
279 Tensor_t tDerivatives(fDerivatives);
280 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
282 evaluate<Architecture_t>(tOutput, fF);
286template<
typename Architecture_t>
288 const Matrix_t & activations_backward,
294 Tensor_t tGradBw(gradients_backward);
295 Tensor_t tActBw(activations_backward);
296 Tensor_t tActGrad(fActivationGradients);
299 Architecture_t::Hadamard( tDeriv, tActGrad);
300 Architecture_t::Backward( tGradBw,
307 addRegularizationGradients<Architecture_t>(fWeightGradients,
313template<
typename Architecture_t>
316 std::cout <<
"Width = " << fWeights.GetNrows();
317 std::cout <<
", Activation Function = ";
318 std::cout << static_cast<int>(fF) << std::endl;
327template<
typename Architecture_t>
330: fBatchSize(BatchSize),
331fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
332fDropoutProbability(layer.GetDropoutProbability()),
333fWeights(layer.GetWeights()), fBiases(layer.GetBiases()),
334fOutput(fBatchSize, fWidth), fDerivatives(fBatchSize, fWidth),
335fWeightGradients(fWidth, fInputWidth), fBiasGradients(fWidth, 1),
336fActivationGradients(fBatchSize, fWidth), fF(layer.GetActivationFunction())
342template<
typename Architecture_t>
344 : fBatchSize(layer.fBatchSize),
345 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
346 fDropoutProbability(layer.fDropoutProbability), fWeights(layer.fWeights),
347 fBiases(layer.fBiases), fOutput(layer.fBatchSize, fWidth),
348 fDerivatives(layer.fBatchSize, fWidth), fWeightGradients(fWidth, fInputWidth),
349 fBiasGradients(fWidth, 1), fActivationGradients(layer.fBatchSize, fWidth),
355template<
typename Architecture_t>
360 if (applyDropout && (fDropoutProbability != 1.0)) {
361 Architecture_t::DropoutForward(input, fDropoutProbability);
363 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
364 Architecture_t::AddRowWise(fOutput, fBiases);
366 Tensor_t tDerivatives(fDerivatives);
367 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
368 evaluate<Architecture_t>(tOutput, fF);
372template<
typename Architecture_t>
374 const Matrix_t & activations_backward,
379 Architecture_t::Backward(gradients_backward,
383 fActivationGradients,
385 activations_backward);
386 addRegularizationGradients<Architecture_t>(fWeightGradients,
392template<
typename Architecture_t>
395 std::cout <<
"Width = " << fWeights.GetNrows();
396 std::cout <<
", Activation Function = ";
397 std::cout << static_cast<int>(fF) << std::endl;
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
const Matrix_t & GetBiasGradients() const
void SetDropoutProbability(Scalar_t p)
const Matrix_t & GetActivationGradients() const
EActivationFunction fF
Activation function of the layer.
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
TLayer(size_t BatchSize, size_t InputWidth, size_t Width, EActivationFunction f, Scalar_t dropoutProbability)
size_t fInputWidth
Number of neurons of the previous layer.
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
const Matrix_t & GetBiases() const
const Matrix_t & GetOutput() const
Matrix_t fOutput
Activations of this layer.
EActivationFunction GetActivationFunction() const
const Matrix_t & GetWeightGradients() const
size_t GetBatchSize() const
size_t GetInputWidth() const
Matrix_t & GetBiasGradients()
Scalar_t fDropoutProbability
Probability that an input is active.
Matrix_t fBiases
The bias values of this layer.
Matrix_t & GetActivationGradients()
size_t fWidth
Number of neurons of this layer.
typename Architecture_t::Matrix_t Matrix_t
typename Architecture_t::Scalar_t Scalar_t
size_t GetDropoutProbability() const
typename Architecture_t::Tensor_t Tensor_t
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
size_t fBatchSize
Batch size used for training and evaluation.
void Initialize(EInitialization m)
Initialize fWeights according to the given initialization method.
Matrix_t fWeights
The fWeights of this layer.
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Matrix_t & GetWeightGradients()
const Matrix_t & GetWeights() const
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Layer class width shared weight and bias layers.
Matrix_t & fBiases
Reference to the bias vectors of this layer.
Matrix_t fOutput
Activations of this layer.
TSharedLayer(size_t fBatchSize, TLayer< Architecture_t > &layer)
Matrix_t & fWeights
Reference to the weight matrix of this layer.
Matrix_t & GetActivationGradients()
const Matrix_t & GetBiasGradients() const
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Matrix_t & GetWeightGradients()
typename Architecture_t::Tensor_t Tensor_t
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
const Matrix_t & GetBiases() const
typename Architecture_t::Matrix_t Matrix_t
size_t fInputWidth
Number of neurons of the previous layer.
size_t fWidth
Number of neurons of this layer.
const Matrix_t & GetWeightGradients() const
size_t fBatchSize
Batch size used for training and evaluation.
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
EActivationFunction GetActivationFunction() const
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
size_t GetDropoutProbability() const
size_t GetInputWidth() const
size_t GetBatchSize() const
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
typename Architecture_t::Scalar_t Scalar_t
EActivationFunction fF
Activation function of the layer.
Scalar_t fDropoutProbability
Probability that an input is active.
Matrix_t & GetWeights() const
const Matrix_t & GetOutput() const
const Matrix_t & GetActivationGradients() const
Matrix_t & GetBiasGradients()
void SetDropoutProbability(Scalar_t p)
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
void Copy(void *source, void *dest)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
create variable transformations