18#ifndef TMVA_DNN_ARCHITECTURES_REFERENCE
19#define TMVA_DNN_ARCHITECTURES_REFERENCE
51template<
typename AReal>
196 template<
typename AMatrix_t>
208 template<
typename AMatrix_t>
413 size_t zeroPaddingHeight,
414 size_t zeroPaddingWidth);
417 size_t ,
size_t ,
size_t ,
size_t ,
size_t ) {
418 Fatal(
"Im2ColIndices",
"This function is not implemented for ref architectures");
421 Fatal(
"Im2ColFast",
"This function is not implemented for ref architectures");
427 size_t filterWidth,
size_t numFilters);
443 Fatal(
"ConvLayerForward",
"This function is not implemented for ref architectures");
464 size_t ,
size_t ,
size_t ,
size_t ,
size_t,
465 size_t ,
size_t ,
size_t ,
size_t ,
size_t) {
466 Fatal(
"ConvLayerBackward",
"This function is not implemented for ref architectures");
470#ifdef HAVE_CNN_REFERENCE
473 static void CalculateConvActivationGradients(std::vector<
TMatrixT<AReal>> &activationGradientsBackward,
475 size_t batchSize,
size_t inputHeight,
size_t inputWidth,
size_t depth,
476 size_t height,
size_t width,
size_t filterDepth,
size_t filterHeight,
482 const std::vector<
TMatrixT<AReal>> &activationBackward,
size_t batchSize,
483 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
484 size_t width,
size_t filterDepth,
size_t filterHeight,
size_t filterWidth,
490 size_t batchSize,
size_t depth,
size_t nLocalViews);
507 size_t imgWidth,
size_t fltHeight,
size_t fltWidth,
size_t strideRows,
size_t strideCols);
617 AReal learningRate,
size_t fBatchSize);
627 AReal corruptionLevel);
658template <
typename AReal>
659template <
typename AMatrix_t>
666template <
typename AReal>
667template <
typename AMatrix_t>
670 for (
size_t i = 0; i < A.size(); ++i) {
671 CopyDiffArch(A[i], B[i]);
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Implementation of the CrossEntropy as separation criterion.
The reference architecture class.
static void AdamUpdate(TMatrixT< AReal > &A, const TMatrixT< AReal > &M, const TMatrixT< AReal > &V, AReal alpha, AReal eps)
Update functions for ADAM optimizer.
static void AdamUpdateSecondMom(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, AReal beta)
static void DropoutForward(Matrix_t &A, Scalar_t p)
static void SymmetricRelu(TMatrixT< AReal > &B)
static void InitializeIdentity(TMatrixT< AReal > &A)
static void MultiplyTranspose(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &input, const TMatrixT< Scalar_t > &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
static void InitializeGlorotNormal(TMatrixT< AReal > &A)
Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a norm...
static void AdamUpdateFirstMom(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, AReal beta)
static void Flatten(TMatrixT< AReal > &A, const std::vector< TMatrixT< AReal > > &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void Relu(TMatrixT< AReal > &B)
static void MaxPoolLayerBackward(TMatrixT< AReal > &activationGradientsBackward, const TMatrixT< AReal > &activationGradients, const TMatrixT< AReal > &indexMatrix, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCol, size_t nLocalViews)
Perform the complete backward propagation step in a Max Pooling Layer.
static void GaussDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftmaxAE(TMatrixT< AReal > &A)
static void AddL1RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
static void CrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void Im2colFast(TMatrixT< AReal > &, const TMatrixT< AReal > &, const std::vector< int > &)
static void Downsample(TMatrixT< AReal > &A, TMatrixT< AReal > &B, const TMatrixT< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void EncodeInput(TMatrixT< AReal > &input, TMatrixT< AReal > &compressedInput, TMatrixT< AReal > &Weights)
static void TanhDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void ReconstructInput(TMatrixT< AReal > &compressedInput, TMatrixT< AReal > &reconstructedInput, TMatrixT< AReal > &fWeights)
static AReal L2Regularization(const TMatrixT< AReal > &W)
static void Im2colIndices(std::vector< int > &, const TMatrixT< AReal > &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
static void IdentityDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static AReal SoftmaxCrossEntropy(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static Matrix_t & GRULayerBackward(TMatrixT< Scalar_t > &state_gradients_backward, TMatrixT< Scalar_t > &reset_weight_gradients, TMatrixT< Scalar_t > &update_weight_gradients, TMatrixT< Scalar_t > &candidate_weight_gradients, TMatrixT< Scalar_t > &reset_state_weight_gradients, TMatrixT< Scalar_t > &update_state_weight_gradients, TMatrixT< Scalar_t > &candidate_state_weight_gradients, TMatrixT< Scalar_t > &reset_bias_gradients, TMatrixT< Scalar_t > &update_bias_gradients, TMatrixT< Scalar_t > &candidate_bias_gradients, TMatrixT< Scalar_t > &dr, TMatrixT< Scalar_t > &du, TMatrixT< Scalar_t > &dc, const TMatrixT< Scalar_t > &precStateActivations, const TMatrixT< Scalar_t > &fReset, const TMatrixT< Scalar_t > &fUpdate, const TMatrixT< Scalar_t > &fCandidate, const TMatrixT< Scalar_t > &weights_reset, const TMatrixT< Scalar_t > &weights_update, const TMatrixT< Scalar_t > &weights_candidate, const TMatrixT< Scalar_t > &weights_reset_state, const TMatrixT< Scalar_t > &weights_update_state, const TMatrixT< Scalar_t > &weights_candidate_state, const TMatrixT< Scalar_t > &input, TMatrixT< Scalar_t > &input_gradient)
Backward pass for GRU Network.
static void ConstAdd(TMatrixT< AReal > &A, AReal beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A)
static void SetRandomSeed(size_t seed)
static void SigmoidDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftSignDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void InitializeZero(TMatrixT< AReal > &A)
static void Softmax(TMatrixT< AReal > &YHat, const TMatrixT< AReal > &)
static void ReciprocalElementWise(TMatrixT< AReal > &A)
Reciprocal each element of the matrix A and write the result into A.
static void Backward(TMatrixT< Scalar_t > &activationGradientsBackward, TMatrixT< Scalar_t > &weightGradients, TMatrixT< Scalar_t > &biasGradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &activationGradients, const TMatrixT< Scalar_t > &weights, const TMatrixT< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
static void SquareElementWise(TMatrixT< AReal > &A)
Square each element of the matrix A and write the result into A.
static void MeanSquaredErrorGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static TRandom * fgRandomGen
static void RotateWeights(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void Rearrange(std::vector< TMatrixT< AReal > > &out, const std::vector< TMatrixT< AReal > > &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static Matrix_t & LSTMLayerBackward(TMatrixT< Scalar_t > &state_gradients_backward, TMatrixT< Scalar_t > &cell_gradients_backward, TMatrixT< Scalar_t > &input_weight_gradients, TMatrixT< Scalar_t > &forget_weight_gradients, TMatrixT< Scalar_t > &candidate_weight_gradients, TMatrixT< Scalar_t > &output_weight_gradients, TMatrixT< Scalar_t > &input_state_weight_gradients, TMatrixT< Scalar_t > &forget_state_weight_gradients, TMatrixT< Scalar_t > &candidate_state_weight_gradients, TMatrixT< Scalar_t > &output_state_weight_gradients, TMatrixT< Scalar_t > &input_bias_gradients, TMatrixT< Scalar_t > &forget_bias_gradients, TMatrixT< Scalar_t > &candidate_bias_gradients, TMatrixT< Scalar_t > &output_bias_gradients, TMatrixT< Scalar_t > &di, TMatrixT< Scalar_t > &df, TMatrixT< Scalar_t > &dc, TMatrixT< Scalar_t > &dout, const TMatrixT< Scalar_t > &precStateActivations, const TMatrixT< Scalar_t > &precCellActivations, const TMatrixT< Scalar_t > &fInput, const TMatrixT< Scalar_t > &fForget, const TMatrixT< Scalar_t > &fCandidate, const TMatrixT< Scalar_t > &fOutput, const TMatrixT< Scalar_t > &weights_input, const TMatrixT< Scalar_t > &weights_forget, const TMatrixT< Scalar_t > &weights_candidate, const TMatrixT< Scalar_t > &weights_output, const TMatrixT< Scalar_t > &weights_input_state, const TMatrixT< Scalar_t > &weights_forget_state, const TMatrixT< Scalar_t > &weights_candidate_state, const TMatrixT< Scalar_t > &weights_output_state, const TMatrixT< Scalar_t > &input, TMatrixT< Scalar_t > &input_gradient, TMatrixT< Scalar_t > &cell_gradient, TMatrixT< Scalar_t > &cell_tanh)
Backward pass for LSTM Network.
static void Deflatten(std::vector< TMatrixT< AReal > > &A, const TMatrixT< Scalar_t > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
static void Im2col(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void Hadamard(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void UpdateParams(TMatrixT< AReal > &x, TMatrixT< AReal > &tildeX, TMatrixT< AReal > &y, TMatrixT< AReal > &z, TMatrixT< AReal > &fVBiases, TMatrixT< AReal > &fHBiases, TMatrixT< AReal > &fWeights, TMatrixT< AReal > &VBiasError, TMatrixT< AReal > &HBiasError, AReal learningRate, size_t fBatchSize)
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void Sigmoid(TMatrixT< AReal > &B)
static void CopyDiffArch(TMatrixT< Scalar_t > &A, const AMatrix_t &B)
static void SymmetricReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void Identity(TMatrixT< AReal > &B)
static void UpdateParamsLogReg(TMatrixT< AReal > &input, TMatrixT< AReal > &output, TMatrixT< AReal > &difference, TMatrixT< AReal > &p, TMatrixT< AReal > &fWeights, TMatrixT< AReal > &fBiases, AReal learningRate, size_t fBatchSize)
static void ConvLayerBackward(std::vector< TMatrixT< AReal > > &, TMatrixT< AReal > &, TMatrixT< AReal > &, std::vector< TMatrixT< AReal > > &, const std::vector< TMatrixT< AReal > > &, const TMatrixT< AReal > &, const std::vector< TMatrixT< AReal > > &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
Perform the complete backward propagation step in a Convolutional Layer.
static void SoftmaxCrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static AReal L1Regularization(const TMatrixT< AReal > &W)
static void AddConvBiases(TMatrixT< AReal > &output, const TMatrixT< AReal > &biases)
Add the biases in the Convolutional Layer.
static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
Apply dropout with activation probability p to the given matrix A and scale the result by reciprocal ...
static void ConvLayerForward(std::vector< TMatrixT< AReal > > &, std::vector< TMatrixT< AReal > > &, const std::vector< TMatrixT< AReal > > &, const TMatrixT< AReal > &, const TMatrixT< AReal > &, const DNN::CNN::TConvParams &, EActivationFunction, std::vector< TMatrixT< AReal > > &)
Forward propagation in the Convolutional layer.
static void AddL2RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
static void Copy(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B)
static void CorruptInput(TMatrixT< AReal > &input, TMatrixT< AReal > &corruptedInput, AReal corruptionLevel)
static void InitializeGauss(TMatrixT< AReal > &A)
static AReal MeanSquaredError(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void SqrtElementWise(TMatrixT< AReal > &A)
Square root each element of the matrix A and write the result into A.
static void SumColumns(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
static void ReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void ForwardLogReg(TMatrixT< AReal > &input, TMatrixT< AReal > &p, TMatrixT< AReal > &fWeights)
static void ConstMult(TMatrixT< AReal > &A, AReal beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void InitializeGlorotUniform(TMatrixT< AReal > &A)
Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).
static void FastTanh(Tensor_t &B)
static void Reshape(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
static void AddBiases(TMatrixT< AReal > &A, const TMatrixT< AReal > &biases)
static TRandom & GetRandomGenerator()
static void PrepareInternals(std::vector< TMatrixT< AReal > > &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
static Matrix_t & RecurrentLayerBackward(TMatrixT< Scalar_t > &state_gradients_backward, TMatrixT< Scalar_t > &input_weight_gradients, TMatrixT< Scalar_t > &state_weight_gradients, TMatrixT< Scalar_t > &bias_gradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &state, const TMatrixT< Scalar_t > &weights_input, const TMatrixT< Scalar_t > &weights_state, const TMatrixT< Scalar_t > &input, TMatrixT< Scalar_t > &input_gradient)
Backpropagation step for a Recurrent Neural Network.
static void InitializeUniform(TMatrixT< AReal > &A)
This is the base class for the ROOT Random number generators.
std::shared_ptr< std::function< double(double)> > Tanh
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
EActivationFunction
Enum that represents layer activation functions.
std::shared_ptr< std::function< double(double)> > Gauss
std::shared_ptr< std::function< double(double)> > Sigmoid
std::shared_ptr< std::function< double(double)> > SoftSign
create variable transformations
static void output(int code)