18#ifndef TMVA_DNN_ARCHITECTURES_CUDA
19#define TMVA_DNN_ARCHITECTURES_CUDA
63template<
typename AReal = Float_t>
132 static void CreateWeightTensors( std::vector<Matrix_t> & newWeights,
const std::vector<Matrix_t> & weights) {
133 if (!newWeights.empty()) newWeights.clear();
134 size_t n = weights.size();
135 for (
size_t i = 0; i <
n; ++i)
136 newWeights.emplace_back( weights[i].GetNrows(), weights[i].GetNcols());
149 Error(
"InitializeBNormDescriptrs",
"Batch normalization on GPU is supported only with Cudnn");
240 const Tensor_t & activationGradients,
242 const Tensor_t & activationBackward);
255 template<
typename AMatrix_t>
268 template<
typename ATensor_t>
270 const ATensor_t & B);
273 template<
typename AMatrix_t>
275 const std::vector<AMatrix_t> & B);
299 const double coef = 0.0,
const AFloat alpha = 1,
527 size_t zeroPaddingHeight,
528 size_t zeroPaddingWidth);
530 static void Im2colIndices(std::vector<int> &V,
const Matrix_t &B,
size_t nLocalViews,
size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
531 size_t fltWidth,
size_t strideRows,
size_t strideCols,
size_t zeroPaddingHeight,
532 size_t zeroPaddingWidth);
538 size_t filterWidth,
size_t numFilters);
579 size_t batchSize,
size_t inputHeight,
580 size_t inputWidth,
size_t depth,
582 size_t filterDepth,
size_t filterHeight,
583 size_t filterWidth,
size_t nLocalViews );
589 const Matrix_t &weights,
size_t batchSize,
590 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
591 size_t width,
size_t filterDepth,
size_t filterHeight,
598 const Tensor_t &activations_backward,
599 size_t batchSize,
size_t inputHeight,
size_t inputWidth,
size_t depth,
600 size_t height,
size_t width,
size_t filterDepth,
size_t filterHeight,
601 size_t filterWidth,
size_t nLocalViews);
606 size_t batchSize,
size_t depth,
size_t nLocalViews);
623 size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
624 size_t fltWidth,
size_t strideRows,
size_t strideCols);
635 const Tensor_t &activationGradients,
718 Fatal(
"TCuda::LSTMLayerBackward",
"Recurrent layers are not supported in the native Cuda architecture!!!");
719 return state_gradients_backward;
736 Fatal(
"TCuda::GRULayerBackward",
"Recurrent layers are not supported in the native Cuda architecture!!!");
737 return state_gradients_backward;
837template <
typename AFloat>
838template <
typename AMatrix_t>
849template <
typename AFloat>
850template <
typename AMatrix_t>
852 const std::vector<AMatrix_t> &A)
854 for (
size_t i = 0; i < B.size(); ++i) {
855 CopyDiffArch(B[i], A[i]);
859template <
typename AFloat>
862 std::cout <<
name <<
" size = " << A.
GetSize() <<
" shape = { ";
864 for (
size_t k = 0; k < shape.size()-1; ++k)
865 std::cout << shape[k] <<
" , ";
866 std::cout << shape.back() <<
" } ";
867 std::cout <<
" strides = { ";
869 for (
size_t k = 0; k < strides.size()-1; ++k)
870 std::cout << strides[k] <<
" , ";
871 std::cout << strides.back() <<
" }\n ";
874 for (
size_t i = 0; i < A.
GetShape()[0]; ++i) {
876 for (
size_t j = 0; j < A.
GetShape()[1]; ++j) {
877 std::cout << A(i,j) <<
" ";
879 std::cout <<
" } " << std::endl;
881 }
else if (A.
GetShape().size() == 3 ) {
884 for (
size_t j = 0; j < A.
GetHSize(); ++j) {
886 for (
size_t k = 0; k < A.
GetWSize(); ++k) {
887 std::cout << A(i,j,k) <<
" ";
889 std::cout <<
" } " << std::endl;
891 std::cout <<
" } " << std::endl;
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
Generic Max Pooling Layer class.
Layer implementing Batch Normalization.
const Shape_t & GetShape() const
const AFloat * GetData() const
const Shape_t & GetStrides() const
size_t GetFirstSize() const
The TCuda architecture class.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void RNNBackward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, Tensor_t &, const RNNDescriptors_t &, RNNWorkspace_t &)
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
static void InitializeGRUWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
TCudaMatrix< AFloat > Matrix_t
static Matrix_t & LSTMLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &)
static void AddL2RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void SymmetricRelu(Tensor_t &B)
static void InitializeUniform(Matrix_t &A)
static void FastTanh(Tensor_t &B)
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void Softmax(Matrix_t &YHat, const Matrix_t &)
static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector< int > &V)
static void InitializeIdentity(Matrix_t &A)
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void InitializeGlorotUniform(Matrix_t &A)
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void InitializeRNNDescriptors(TDescriptors *&, GenLayer_t *)
static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void CopyDiffArch(Matrix_t &B, const AMatrix_t &A)
static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal ...
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void InitializeGRUDescriptors(TDescriptors *&, GenLayer_t *)
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void InitializeGRUTensors(GenLayer_t *)
static void FreeConvWorkspace(TWorkspace *&)
Only used for certain cudnn on-device memory.
static void Sigmoid(Matrix_t &YHat, const Matrix_t &)
static void InitializeZero(Tensor_t &A)
static void InitializeRNNWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
static void InitializeBNormDescriptors(TDescriptors *&, BNormLayer_t *)
Initialize CNN data/operator descriptors.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void AddL1RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void Sigmoid(Tensor_t &B)
static void DropoutBackward(Tensor_t &, TDescriptors *, TWorkspace *)
static void InitializeLSTMTensors(GenLayer_t *)
static void SoftSignDerivative(Tensor_t &B, const Tensor_t &A)
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void SymmetricReluDerivative(Tensor_t &B, const Tensor_t &A)
static void FreeRNNWorkspace(TWorkspace *&)
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
static void InitializeLSTMWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
static void PrintTensor(const Tensor_t &A, const std::string name="Cuda-tensor", bool=false)
static void Im2colIndices(std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void Copy(Tensor_t &A, const Tensor_t &B)
static void InitializeConvDescriptors(TDescriptors *&, ConvLayer_t *)
static void Tanh(Tensor_t &B)
static void SigmoidDerivative(Tensor_t &B, const Tensor_t &A)
static TRandom * fgRandomGen
static void InitializePoolDropoutWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, PoolingLayer_t *)
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void ScaleAdd(Tensor_t &A, const Tensor_t &B, Scalar_t beta=1.0)
Above functions extended to vectors.
static TMVA::Experimental::MemoryLayout GetTensorLayout()
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void Gauss(Tensor_t &B)
static void ActivationFunctionForward(Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const AFloat alpha=1, const AFloat beta=0)
static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static Matrix_t & RecurrentLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients, Matrix_t &bias_gradients, Matrix_t &df, const Matrix_t &state, const Matrix_t &weights_input, const Matrix_t &weights_state, const Matrix_t &input, Matrix_t &input_gradient)
Backward pass for Recurrent Networks.
static Scalar_t Sum(const Matrix_t &A)
Compute the sum of all elements in A.
static void InitializePoolDescriptors(TDescriptors *&, PoolingLayer_t *)
static void RNNForward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Tensor_t &, Matrix_t &, Matrix_t &, const RNNDescriptors_t &, RNNWorkspace_t &, bool)
static void Hadamard(Matrix_t &A, const Matrix_t &B)
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w)
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
TCudaTensor< AFloat > Tensor_t
static void InitializeGlorotNormal(Matrix_t &A)
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
static void ReluDerivative(Tensor_t &B, const Tensor_t &A)
static Scalar_t L2Regularization(const Matrix_t &W)
static void IdentityDerivative(Tensor_t &B, const Tensor_t &A)
static TRandom & GetRandomGenerator()
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void CreateWeightTensors(std::vector< Matrix_t > &newWeights, const std::vector< Matrix_t > &weights)
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void InitializeGauss(Matrix_t &A)
static void InitializeActivationDescriptor(ActivationDescriptor_t &, EActivationFunction, double=0.0)
static void GaussDerivative(Tensor_t &B, const Tensor_t &A)
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static void InitializeConvWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, ConvLayer_t *)
static void InitializeLSTMDescriptors(TDescriptors *&, GenLayer_t *)
static void ReleaseBNormDescriptors(TDescriptors *&)
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
static void AddRowWise(Tensor_t &output, const Matrix_t &biases)
static void SoftSign(Tensor_t &B)
static void InitializeRNNTensors(GenLayer_t *)
TCudaDeviceBuffer< AFloat > DeviceBuffer_t
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights)
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
static void ReleaseConvDescriptors(TDescriptors *&)
Release CNN data/operator descriptors.
static void FreePoolDropoutWorkspace(TWorkspace *&)
static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A)
static void SetRandomSeed(size_t seed)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void TanhDerivative(Tensor_t &B, const Tensor_t &A)
static void ReleaseDescriptor(ActivationDescriptor_t &)
static void CopyDiffArch(std::vector< Matrix_t > &A, const std::vector< AMatrix_t > &B)
static void ReleaseRNNDescriptors(TDescriptors *&)
static Matrix_t & GRULayerBackward(Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, bool)
Backward pass for GRU Network.
static void ActivationFunctionBackward(Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const AFloat alpha=1, const AFloat beta=0)
Computes the gradient of the activation function.
static void Relu(Tensor_t &B)
static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static Scalar_t L1Regularization(const Matrix_t &W)
static void InitializeZero(Matrix_t &A)
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void DropoutForward(Matrix_t &A, Scalar_t p)
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void CopyDiffArch(Tensor_t &A, const ATensor_t &B)
static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void ReleasePoolDescriptors(TDescriptors *&)
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Generic General Layer class.
This is the base class for the ROOT Random number generators.
std::shared_ptr< std::function< double(double)> > Tanh
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
EActivationFunction
Enum that represents layer activation functions.
MemoryLayout
Memory layout type (copy from RTensor.hxx)
create variable transformations