36 float alpha = 1.0, beta = 0.0;
42 CUBLAS_OP_N, CUBLAS_OP_T,
60 double alpha = 1.0, beta = 0.0;
66 CUBLAS_OP_N, CUBLAS_OP_T,
75template<
typename AFloat>
90template<
typename AFloat>
109 if (activation_gradients_backward.
GetSize() > 0) {
111 Matrix_t activation_gradients_backward_m = activation_gradients_backward.
GetMatrix();
128template<
typename AFloat>
135 m *
n *
sizeof(AFloat), cudaMemcpyDeviceToDevice, 0);
139template<
typename AFloat>
146 n *
sizeof(AFloat), cudaMemcpyDeviceToDevice, 0);
150template<
typename AFloat>
153 size_t temp = imgDim - fltDim + 2 * padding;
154 if (temp % stride || temp + stride <= 0) {
155 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride)"
156 " %zu , %zu , %zu , %zu", imgDim, fltDim, padding, stride);
158 return temp / stride + 1;
182template<
typename AFloat>
191 size_t zeroPaddingHeight,
192 size_t zeroPaddingWidth)
201 fltHeight, fltWidth, strideRows, strideCols,
202 zeroPaddingHeight, zeroPaddingWidth);
206template<
typename AFloat>
219 filterHeight, filterWidth, numFilters);
223template <
typename AFloat>
235template <
typename AFloat>
254 for(
size_t event = 0;
event < input.
GetFirstSize();
event++) {
255 Matrix_t inputPrime_m = inputPrime.
At(event).GetMatrix();
256 Matrix_t output_m = output.
At(event).GetMatrix();
269 Copy(inputActivationFunc, output);
275template<
typename AFloat>
311 height, width, filterDepth, filterHeight, filterWidth);
316 height, width, filterDepth, filterHeight, filterWidth, nLocalViews);
323template<
typename AFloat>
338 if (activationGradientsBackward.
GetSize() == 0)
return;
344 size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight - height + filterHeight - 1) / 2));
345 size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth - width + filterWidth - 1) / 2));
348 size_t tempNLocalViews = inputHeight * inputWidth;
349 size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
352 size_t tempStrideRows = 1;
353 size_t tempStrideCols = 1;
355 R__ASSERT( df.GetFirstSize() == batchSize);
358 for(
size_t event = 0;
event < batchSize;
event++) {
359 Im2col(dfPrime, df.At(event).GetMatrix(), height, width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
360 tempZeroPaddingHeight, tempZeroPaddingWidth);
368template<
typename AFloat>
384 weightGradients.
Zero();
386 const size_t filterSize = filterHeight * filterWidth;
387 const size_t nLocalViewPixels = filterDepth * filterSize;
390 R__ASSERT( df.GetFirstSize() == batchSize);
394 const size_t tempStrideRows = 1;
395 const size_t tempStrideCols = 1;
398 const size_t tempZeroPaddingHeight = (height - inputHeight + filterHeight - 1) / 2;
399 const size_t tempZeroPaddingWidth = (width - inputWidth + filterWidth - 1) / 2;
404 for(
size_t event = 0;
event < batchSize;
event++) {
405 Im2col(activationsPrime, activationsBackward.
At(event).GetMatrix(), inputHeight, inputWidth, filterHeight, filterWidth,
406 tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
408 Multiply(resPrime, df.At(event).GetMatrix(), activationsPrime);
415template<
typename AFloat>
422 biasGradients.
Zero();
424 for (
size_t event = 0;
event < batchSize;
event++) {
431template<
typename AFloat>
467template<
typename AFloat>
480 size_t depth = C.GetCSize();
481 size_t bsize = C.GetFirstSize();
487 for(
size_t event = 0;
event < bsize;
event++) {
493 C.GetDataPointerAt(event), depth, imgHeight, imgWidth,
494 fltHeight, fltWidth, strideRows, strideCols);
498template<
typename AFloat>
514 size_t depth = activationGradientsBackward.
GetCSize();
520 activationGradientsBackward.
GetWSize());
523 for(
size_t event = 0;
event < bsize;
event++) {
528 depth, imgHeight, imgWidth, fltHeight, fltWidth,
529 strideRows, strideCols);
534template<
typename AFloat>
547template <
typename AReal>
591template<
typename AFloat>
669template<
typename AFloat>
728template <
typename AFloat>
742template <
typename AFloat>
753template <
typename AFloat>
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
cudaStream_t GetComputeStream() const
size_t GetNoElements() const
const cublasHandle_t & GetCublasHandle() const
const AFloat * GetDataPointer() const
TCudaTensor< AFloat > At(size_t i) const
const AFloat * GetDataPointerAt(size_t i) const
const Shape_t & GetShape() const
cudaStream_t GetComputeStream() const
MemoryLayout GetLayout() const
TCudaMatrix< AFloat > GetMatrix() const
const AFloat * GetDataPointer() const
size_t GetFirstSize() const
CNN::TCNNDescriptors< ConvLayer_t > ConvDescriptors_t
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
TCudaMatrix< AFloat > Matrix_t
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
CNN::TCNNWorkspace< ConvLayer_t > ConvWorkspace_t
static void ActivationFunctionForward(Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const AFloat alpha=1, const AFloat beta=0)
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void ActivationFunctionBackward(Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const AFloat alpha=1, const AFloat beta=0)
Computes the gradient of the activation function.
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static TMVA::Experimental::MemoryLayout GetTensorLayout()
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
CNN::TCNNDescriptors< PoolingLayer_t > PoolingDescriptors_t
CudaActivationDescriptor ActivationDescriptor_t
TCudaTensor< AFloat > Tensor_t
CNN::TCNNWorkspace< PoolingLayer_t > PoolingWorkspace_t
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
DummyCudaDataType TensorDescriptor_t
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static dim3 BlockDims2D()
static dim3 GridDims2D(int nrows, int ncols)
__global__ void AddRowWise(AFloat *W, const AFloat *theta, int m, int n)
__global__ void Deflatten(AFloat *A, const AFloat *B, int size, int nRows, int nCols)
Deflatten a 2D-array into an array of 2D-arrays.
__global__ void Flatten(AFloat *A, const AFloat *B, int size, int nRows, int nCols)
Flatten an array of 2D-arrays into a single 2D-array.
__global__ void RotateWeights(AFloat *A, const AFloat *B, int filterDepth, int filterHeight, int filterWidth, int numFilters)
__global__ void Im2Col(AFloat *A, const AFloat *B, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols, int zeroPaddingHeight, int zeroPaddingWidth)
A kernel that re-arranges image regions of the input matrix \B, into column vectors in matrix \A.
__global__ void DeflattenRM(AFloat *A, const AFloat *B, int size, int nRows, int nCols)
__global__ void Reshape(AFloat *A, const AFloat *B, int nRowsA, int nColsA, int nRowsB, int nColsB)
__global__ void FlattenRM(AFloat *A, const AFloat *B, int size, int nRows, int nCols)
__global__ void MaxPoolBackward(AFloat *activationGradientsBackward, const AFloat *activationGradients, const AFloat *indexMatrix, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols)
Back-propagate the gradients through a max-pooling layer.
__global__ void Downsample(AFloat *output, AFloat *indexMatrix, const AFloat *input, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols)
Downsampling kernel used as the forward propagation step of a Max-Pooling layer.
__global__ void AddBiases(AFloat *A, const AFloat *B, int nRows, int nCols)
EActivationFunction
Enum that represents layer activation functions.
create variable transformations
size_t strideRows
The number of row pixels to slid the filter each step.
size_t filterHeight
The height of the filter.
size_t inputHeight
The height of the previous layer or input.
size_t paddingWidth
The number of zero layers left and right of the input.
size_t filterWidth
The width of the filter.
size_t paddingHeight
The number of zero layers added top and bottom of the input.
size_t inputWidth
The width of the previous layer or input.
size_t strideCols
The number of column pixels to slid the filter each step.