51template<
typename Architecture_t>
56 using Scalar_t =
typename Architecture_t::Scalar_t;
57 using Matrix_t =
typename Architecture_t::Matrix_t;
100 const Matrix_t & activations_backward,
144template<
typename Architecture_t>
150 using Scalar_t =
typename Architecture_t::Scalar_t;
151 using Matrix_t =
typename Architecture_t::Matrix_t;
188 const Matrix_t & activations_backward,
222template<
typename Architecture_t>
228 : fBatchSize(batchSize), fInputWidth(inputWidth), fWidth(
width),
229 fDropoutProbability(dropoutProbability), fWeights(
width, fInputWidth),
230 fBiases(
width, 1), fOutput(fBatchSize,
width), fDerivatives(fBatchSize,
width),
231 fWeightGradients(
width, fInputWidth), fBiasGradients(
width, 1),
232 fActivationGradients(fBatchSize,
width), fF(
f)
238template<
typename Architecture_t>
240 : fBatchSize(layer.fBatchSize), fInputWidth(layer.fInputWidth),
241 fWidth(layer.fWidth), fDropoutProbability(layer.fDropoutProbability),
242 fWeights(layer.fWidth, layer.fInputWidth), fBiases(layer.fWidth, 1),
243 fOutput(layer.fBatchSize, layer.fWidth),
244 fDerivatives(layer.fBatchSize, layer.fWidth),
245 fWeightGradients(layer.fWidth, layer.fInputWidth),
246 fBiasGradients(layer.fWidth, 1),
247 fActivationGradients(layer.fBatchSize, layer.fWidth),
255template<
typename Architecture_t>
259 initialize<Architecture_t>(fWeights,
m);
264template<
typename Architecture_t>
269 if (applyDropout && (fDropoutProbability != 1.0)) {
270 Architecture_t::Dropout(input, fDropoutProbability);
272 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
273 Architecture_t::AddRowWise(fOutput, fBiases);
274 evaluateDerivative<Architecture_t>(fDerivatives, fF, fOutput);
275 evaluate<Architecture_t>(fOutput, fF);
279template<
typename Architecture_t>
281 const Matrix_t & activations_backward,
286 Architecture_t::Backward(gradients_backward,
290 fActivationGradients,
292 activations_backward);
293 addRegularizationGradients<Architecture_t>(fWeightGradients,
299template<
typename Architecture_t>
302 std::cout <<
"Width = " << fWeights.GetNrows();
303 std::cout <<
", Activation Function = ";
304 std::cout << static_cast<int>(fF) << std::endl;
313template<
typename Architecture_t>
316: fBatchSize(BatchSize),
317fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
318fDropoutProbability(layer.GetDropoutProbability()),
319fWeights(layer.GetWeights()), fBiases(layer.GetBiases()),
320fOutput(fBatchSize, fWidth), fDerivatives(fBatchSize, fWidth),
321fWeightGradients(fWidth, fInputWidth), fBiasGradients(fWidth, 1),
322fActivationGradients(fBatchSize, fWidth), fF(layer.GetActivationFunction())
328template<
typename Architecture_t>
330 : fBatchSize(layer.fBatchSize),
331 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
332 fDropoutProbability(layer.fDropoutProbability), fWeights(layer.fWeights),
333 fBiases(layer.fBiases), fOutput(layer.fBatchSize, fWidth),
334 fDerivatives(layer.fBatchSize, fWidth), fWeightGradients(fWidth, fInputWidth),
335 fBiasGradients(fWidth, 1), fActivationGradients(layer.fBatchSize, fWidth),
341template<
typename Architecture_t>
346 if (applyDropout && (fDropoutProbability != 1.0)) {
347 Architecture_t::Dropout(input, fDropoutProbability);
349 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
350 Architecture_t::AddRowWise(fOutput, fBiases);
351 evaluateDerivative<Architecture_t>(fDerivatives, fF, fOutput);
352 evaluate<Architecture_t>(fOutput, fF);
356template<
typename Architecture_t>
358 const Matrix_t & activations_backward,
363 Architecture_t::Backward(gradients_backward,
367 fActivationGradients,
369 activations_backward);
370 addRegularizationGradients<Architecture_t>(fWeightGradients,
376template<
typename Architecture_t>
379 std::cout <<
"Width = " << fWeights.GetNrows();
380 std::cout <<
", Activation Function = ";
381 std::cout << static_cast<int>(fF) << std::endl;
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
const Matrix_t & GetBiasGradients() const
void SetDropoutProbability(Scalar_t p)
const Matrix_t & GetActivationGradients() const
EActivationFunction fF
Activation function of the layer.
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
TLayer(size_t BatchSize, size_t InputWidth, size_t Width, EActivationFunction f, Scalar_t dropoutProbability)
size_t fInputWidth
Number of neurons of the previous layer.
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
const Matrix_t & GetBiases() const
const Matrix_t & GetOutput() const
Matrix_t fOutput
Activations of this layer.
EActivationFunction GetActivationFunction() const
const Matrix_t & GetWeightGradients() const
size_t GetBatchSize() const
size_t GetInputWidth() const
Matrix_t & GetBiasGradients()
Scalar_t fDropoutProbability
Probability that an input is active.
Matrix_t fBiases
The bias values of this layer.
Matrix_t & GetActivationGradients()
size_t fWidth
Number of neurons of this layer.
typename Architecture_t::Matrix_t Matrix_t
typename Architecture_t::Scalar_t Scalar_t
size_t GetDropoutProbability() const
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
size_t fBatchSize
Batch size used for training and evaluation.
void Initialize(EInitialization m)
Initialize fWeights according to the given initialization method.
Matrix_t fWeights
The fWeights of this layer.
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Matrix_t & GetWeightGradients()
const Matrix_t & GetWeights() const
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Layer class width shared weight and bias layers.
Matrix_t & fBiases
Reference to the bias vectors of this layer.
Matrix_t fOutput
Activations of this layer.
TSharedLayer(size_t fBatchSize, TLayer< Architecture_t > &layer)
Matrix_t & fWeights
Reference to the weight matrix of this layer.
Matrix_t & GetActivationGradients()
const Matrix_t & GetBiasGradients() const
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Matrix_t & GetWeightGradients()
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
const Matrix_t & GetBiases() const
typename Architecture_t::Matrix_t Matrix_t
size_t fInputWidth
Number of neurons of the previous layer.
size_t fWidth
Number of neurons of this layer.
const Matrix_t & GetWeightGradients() const
size_t fBatchSize
Batch size used for training and evaluation.
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
EActivationFunction GetActivationFunction() const
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
size_t GetDropoutProbability() const
size_t GetInputWidth() const
size_t GetBatchSize() const
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
typename Architecture_t::Scalar_t Scalar_t
EActivationFunction fF
Activation function of the layer.
Scalar_t fDropoutProbability
Probability that an input is active.
Matrix_t & GetWeights() const
const Matrix_t & GetOutput() const
const Matrix_t & GetActivationGradients() const
Matrix_t & GetBiasGradients()
void SetDropoutProbability(Scalar_t p)
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
void Copy(void *source, void *dest)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
create variable transformations