36 float alpha = 1.0, beta = 0.0;
39 cudaStream_t s =
output.GetComputeStream();
40 cublasSetStream(
input.GetCublasHandle(), s);
41 cublasSgemm(
input.GetCublasHandle(),
42 CUBLAS_OP_N, CUBLAS_OP_T,
60 double alpha = 1.0, beta = 0.0;
63 cudaStream_t s =
output.GetComputeStream();
64 cublasSetStream(
input.GetCublasHandle(), s);
65 cublasDgemm(
input.GetCublasHandle(),
66 CUBLAS_OP_N, CUBLAS_OP_T,
75template<
typename AFloat>
82 ::TMVA::DNN::Cuda::AddRowWise<<<gridDims, blockDims, 0, s>>>(
90template<
typename AFloat>
109 if (activation_gradients_backward.
GetSize() > 0) {
111 Matrix_t activation_gradients_backward_m = activation_gradients_backward.
GetMatrix();
128template<
typename AFloat>
135 m *
n *
sizeof(AFloat), cudaMemcpyDeviceToDevice, 0);
139template<
typename AFloat>
146 n *
sizeof(AFloat), cudaMemcpyDeviceToDevice, 0);
150template<
typename AFloat>
153 size_t temp = imgDim - fltDim + 2 * padding;
154 if (temp % stride || temp + stride <= 0) {
155 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride)"
156 " %zu , %zu , %zu , %zu", imgDim, fltDim, padding, stride);
158 return temp / stride + 1;
182template<
typename AFloat>
191 size_t zeroPaddingHeight,
192 size_t zeroPaddingWidth)
201 fltHeight, fltWidth, strideRows, strideCols,
202 zeroPaddingHeight, zeroPaddingWidth);
206template<
typename AFloat>
219 filterHeight, filterWidth, numFilters);
223template <
typename AFloat>
235template <
typename AFloat>
254 for(
size_t event = 0;
event <
input.GetFirstSize();
event++) {
261 MultiplyTranspose(output_m, weights, inputPrime_m);
262 AddConvBiases(output_m, biases);
269 Copy(inputActivationFunc,
output);
275template<
typename AFloat>
302 ActivationFunctionBackward(df, outputTensor, activationGradients, inputActivationFunc,
310 CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth, depth,
311 height,
width, filterDepth, filterHeight, filterWidth);
315 CalculateConvWeightGradients(weightGradients, df, activationBackward, batchSize, inputHeight, inputWidth, depth,
316 height,
width, filterDepth, filterHeight, filterWidth, nLocalViews);
319 CalculateConvBiasGradients(biasGradients, df, batchSize, depth, nLocalViews);
323template<
typename AFloat>
338 if (activationGradientsBackward.
GetSize() == 0)
return;
341 RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.
GetNrows());
344 size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight -
height + filterHeight - 1) / 2));
345 size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth -
width + filterWidth - 1) / 2));
348 size_t tempNLocalViews = inputHeight * inputWidth;
349 size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
352 size_t tempStrideRows = 1;
353 size_t tempStrideCols = 1;
358 for(
size_t event = 0;
event < batchSize;
event++) {
359 Im2col(dfPrime, df.
At(
event).GetMatrix(),
height,
width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
360 tempZeroPaddingHeight, tempZeroPaddingWidth);
363 MultiplyTranspose(agb_m, rotWeights, dfPrime);
368template<
typename AFloat>
384 weightGradients.
Zero();
386 const size_t filterSize = filterHeight * filterWidth;
387 const size_t nLocalViewPixels = filterDepth * filterSize;
394 const size_t tempStrideRows = 1;
395 const size_t tempStrideCols = 1;
398 const size_t tempZeroPaddingHeight = (
height - inputHeight + filterHeight - 1) / 2;
399 const size_t tempZeroPaddingWidth = (
width - inputWidth + filterWidth - 1) / 2;
404 for(
size_t event = 0;
event < batchSize;
event++) {
405 Im2col(activationsPrime, activationsBackward.
At(
event).GetMatrix(), inputHeight, inputWidth, filterHeight, filterWidth,
406 tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
408 Multiply(resPrime, df.
At(
event).GetMatrix(), activationsPrime);
415template<
typename AFloat>
422 biasGradients.
Zero();
424 for (
size_t event = 0;
event < batchSize;
event++) {
431template<
typename AFloat>
437 cudaStream_t s =
output.GetComputeStream();
438 ::TMVA::DNN::Cuda::AddBiases<<<gridDims, blockDims, 0, s>>>(
467template<
typename AFloat>
480 size_t depth = C.GetCSize();
481 size_t bsize = C.GetFirstSize();
487 for(
size_t event = 0;
event < bsize;
event++) {
493 C.GetDataPointerAt(
event), depth, imgHeight, imgWidth,
494 fltHeight, fltWidth, strideRows, strideCols);
498template<
typename AFloat>
514 size_t depth = activationGradientsBackward.
GetCSize();
520 activationGradientsBackward.
GetWSize());
523 for(
size_t event = 0;
event < bsize;
event++) {
525 ::TMVA::DNN::Cuda::MaxPoolBackward<<<gridDims, blockDims, 0, s>>>(activationGradientsBackward.
GetDataPointerAt(
event),
528 depth, imgHeight, imgWidth, fltHeight, fltWidth,
529 strideRows, strideCols);
534template<
typename AFloat>
547template <
typename AReal>
591template<
typename AFloat>
669template<
typename AFloat>
728template <
typename AFloat>
742template <
typename AFloat>
753template <
typename AFloat>
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
cudaStream_t GetComputeStream() const
size_t GetNoElements() const
const AFloat * GetDataPointer() const
TCudaTensor< AFloat > At(size_t i) const
const AFloat * GetDataPointerAt(size_t i) const
const Shape_t & GetShape() const
cudaStream_t GetComputeStream() const
MemoryLayout GetLayout() const
TCudaMatrix< AFloat > GetMatrix() const
const AFloat * GetDataPointer() const
size_t GetFirstSize() const
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static dim3 BlockDims2D()
static dim3 GridDims2D(int nrows, int ncols)
EActivationFunction
Enum that represents layer activation functions.
create variable transformations
size_t strideRows
The number of row pixels to slid the filter each step.
size_t filterHeight
The height of the filter.
size_t inputHeight
The height of the previous layer or input.
size_t paddingWidth
The number of zero layers left and right of the input.
size_t filterWidth
The width of the filter.
size_t paddingHeight
The number of zero layers added top and bottom of the input.
size_t inputWidth
The width of the previous layer or input.
size_t strideCols
The number of column pixels to slid the filter each step.