18#ifndef TMVA_DNN_ARCHITECTURES_CUDA
19#define TMVA_DNN_ARCHITECTURES_CUDA
62template<
typename AReal = Float_t>
118 static void CreateWeightTensors( std::vector<Matrix_t> & newWeights,
const std::vector<Matrix_t> & weights) {
119 if (!newWeights.empty()) newWeights.clear();
120 size_t n = weights.size();
121 for (
size_t i = 0; i <
n; ++i)
122 newWeights.emplace_back( weights[i].GetNrows(), weights[i].GetNcols());
134 Error(
"InitializeBNormDescriptrs",
"Batch normalization on GPU is supported only with Cudnn");
209 const Tensor_t & activationGradients,
211 const Tensor_t & activationBackward);
224 template<
typename AMatrix_t>
237 template<
typename ATensor_t>
239 const ATensor_t &
B);
242 template<
typename AMatrix_t>
244 const std::vector<AMatrix_t> &
B);
268 const double coef = 0.0,
const AFloat alpha = 1,
492 size_t zeroPaddingHeight,
493 size_t zeroPaddingWidth);
495 static void Im2colIndices(std::vector<int> &V,
const Matrix_t &
B,
size_t nLocalViews,
size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
496 size_t fltWidth,
size_t strideRows,
size_t strideCols,
size_t zeroPaddingHeight,
497 size_t zeroPaddingWidth);
503 size_t filterWidth,
size_t numFilters);
544 size_t batchSize,
size_t inputHeight,
545 size_t inputWidth,
size_t depth,
546 size_t height,
size_t width,
547 size_t filterDepth,
size_t filterHeight,
548 size_t filterWidth,
size_t nLocalViews );
554 const Matrix_t &weights,
size_t batchSize,
555 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
556 size_t width,
size_t filterDepth,
size_t filterHeight,
563 const Tensor_t &activations_backward,
564 size_t batchSize,
size_t inputHeight,
size_t inputWidth,
size_t depth,
565 size_t height,
size_t width,
size_t filterDepth,
size_t filterHeight,
566 size_t filterWidth,
size_t nLocalViews);
571 size_t batchSize,
size_t depth,
size_t nLocalViews);
588 size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
589 size_t fltWidth,
size_t strideRows,
size_t strideCols);
600 const Tensor_t &activationGradients,
750template <
typename AFloat>
751template <
typename AMatrix_t>
762template <
typename AFloat>
763template <
typename AMatrix_t>
765 const std::vector<AMatrix_t> &
A)
767 for (
size_t i = 0; i <
B.size(); ++i) {
768 CopyDiffArch(
B[i],
A[i]);
772template <
typename AFloat>
775 std::cout <<
name <<
" size = " <<
A.GetSize() <<
" shape = { ";
776 auto shape =
A.GetShape();
777 for (
size_t k = 0; k < shape.size()-1; ++k)
778 std::cout << shape[k] <<
" , ";
779 std::cout << shape.back() <<
" } ";
780 std::cout <<
" strides = { ";
781 auto strides =
A.GetStrides();
782 for (
size_t k = 0; k < strides.size()-1; ++k)
783 std::cout << strides[k] <<
" , ";
784 std::cout << strides.back() <<
" }\n ";
786 if (
A.GetShape().size() == 2 ) {
787 for (
size_t i = 0; i <
A.GetShape()[0]; ++i) {
789 for (
size_t j = 0; j <
A.GetShape()[1]; ++j) {
790 std::cout <<
A(i,j) <<
" ";
792 std::cout <<
" } " << std::endl;
794 }
else if (
A.GetShape().size() == 3 ) {
795 for (
size_t i = 0; i <
A.GetFirstSize(); ++i) {
797 for (
size_t j = 0; j <
A.GetHSize(); ++j) {
799 for (
size_t k = 0; k <
A.GetWSize(); ++k) {
800 std::cout <<
A(i,j,k) <<
" ";
802 std::cout <<
" } " << std::endl;
804 std::cout <<
" } " << std::endl;
808 for (
size_t l = 0;
l <
A.GetSize(); ++
l) {
809 std::cout <<
A.GetData()[
l] <<
" ";
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Error(const char *location, const char *msgfmt,...)
Generic Max Pooling Layer class.
Layer implementing Batch Normalization.
TCudaMatrix< AFloat > GetMatrix() const
The TCuda architecture class.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
static void AddL2RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void SymmetricRelu(Tensor_t &B)
static void FreeConvWorkspace(TWorkspace *&, ConvLayer_t *)
Only used for certain cudnn on-device memory.
static void InitializeUniform(Matrix_t &A)
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void Softmax(Matrix_t &YHat, const Matrix_t &)
static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector< int > &V)
static void InitializeIdentity(Matrix_t &A)
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void InitializeGlorotUniform(Matrix_t &A)
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void CopyDiffArch(Matrix_t &B, const AMatrix_t &A)
static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal ...
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void Sigmoid(Matrix_t &YHat, const Matrix_t &)
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
static void InitializeBNormDescriptors(TDescriptors *&, BNormLayer_t *)
Initialize CNN data/operator descriptors.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void AddL1RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void Sigmoid(Tensor_t &B)
static void DropoutBackward(Tensor_t &, TDescriptors *, TWorkspace *)
static void SoftSignDerivative(Tensor_t &B, const Tensor_t &A)
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void SymmetricReluDerivative(Tensor_t &B, const Tensor_t &A)
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static void PrintTensor(const Tensor_t &A, const std::string name="Cuda-tensor", bool=false)
static void Im2colIndices(std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void Copy(Tensor_t &A, const Tensor_t &B)
static void InitializeConvDescriptors(TDescriptors *&, ConvLayer_t *)
static void Tanh(Tensor_t &B)
static void SigmoidDerivative(Tensor_t &B, const Tensor_t &A)
static TRandom * fgRandomGen
static void InitializePoolDropoutWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, PoolingLayer_t *)
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void ScaleAdd(Tensor_t &A, const Tensor_t &B, Scalar_t beta=1.0)
Above functions extended to vectors.
static TMVA::Experimental::MemoryLayout GetTensorLayout()
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void Gauss(Tensor_t &B)
static void ActivationFunctionForward(Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const AFloat alpha=1, const AFloat beta=0)
static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static Matrix_t & RecurrentLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients, Matrix_t &bias_gradients, Matrix_t &df, const Matrix_t &state, const Matrix_t &weights_input, const Matrix_t &weights_state, const Matrix_t &input, Matrix_t &input_gradient)
Backward pass for Recurrent Networks.
static Scalar_t Sum(const Matrix_t &A)
Compute the sum of all elements in A.
static void InitializePoolDescriptors(TDescriptors *&, PoolingLayer_t *)
static void Hadamard(Matrix_t &A, const Matrix_t &B)
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w)
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
TCudaTensor< AFloat > Tensor_t
static void InitializeGlorotNormal(Matrix_t &A)
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
static void ReluDerivative(Tensor_t &B, const Tensor_t &A)
static Scalar_t L2Regularization(const Matrix_t &W)
static void FreePoolDropoutWorkspace(TWorkspace *&, PoolingLayer_t *)
static void IdentityDerivative(Tensor_t &B, const Tensor_t &A)
static TRandom & GetRandomGenerator()
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void CreateWeightTensors(std::vector< Matrix_t > &newWeights, const std::vector< Matrix_t > &weights)
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void InitializeGauss(Matrix_t &A)
static void InitializeActivationDescriptor(ActivationDescriptor_t &, EActivationFunction, double=0.0)
static void GaussDerivative(Tensor_t &B, const Tensor_t &A)
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static void InitializeConvWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, ConvLayer_t *)
static void ReleaseBNormDescriptors(TDescriptors *&)
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void AddRowWise(Tensor_t &output, const Matrix_t &biases)
static void SoftSign(Tensor_t &B)
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights)
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
static void ReleaseConvDescriptors(TDescriptors *&)
Release CNN data/operator descriptors.
static void SetRandomSeed(size_t seed)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void TanhDerivative(Tensor_t &B, const Tensor_t &A)
static void ReleaseDescriptor(ActivationDescriptor_t &)
static void CopyDiffArch(std::vector< Matrix_t > &A, const std::vector< AMatrix_t > &B)
static void ActivationFunctionBackward(Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const AFloat alpha=1, const AFloat beta=0)
Computes the gradient of the activation function.
static void Relu(Tensor_t &B)
static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static Scalar_t L1Regularization(const Matrix_t &W)
static void InitializeZero(Matrix_t &A)
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void DropoutForward(Matrix_t &A, Scalar_t p)
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void CopyDiffArch(Tensor_t &A, const ATensor_t &B)
static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void ReleasePoolDescriptors(TDescriptors *&)
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
This is the base class for the ROOT Random number generators.
double beta(double x, double y)
Calculates the beta function.
void Copy(void *source, void *dest)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
EActivationFunction
Enum that represents layer activation functions.
MemoryLayout
Memory layout type (copy from RTensor.hxx)
create variable transformations
static void output(int code)