36   float alpha = 1.0, beta = 0.0;
 
   39   cudaStream_t s = 
output.GetComputeStream();
 
 
   60   double alpha = 1.0, beta = 0.0;
 
   63   cudaStream_t s = 
output.GetComputeStream();
 
 
   75template<
typename AFloat>
 
   81   cudaStream_t s = 
Weights.GetComputeStream();
 
   82   ::TMVA::DNN::Cuda::AddRowWise<<<gridDims, blockDims, 0, s>>>(
 
   84       theta.GetDataPointer(),
 
   90template<
typename AFloat>
 
  128template<
typename AFloat>
 
  132   size_t m = B.GetNrows();
 
  133   size_t n = B.GetNcols();
 
  139template<
typename AFloat>
 
  143   size_t n = B.GetSize();
 
  150template<
typename AFloat>
 
  155      Fatal(
"calculateDimension", 
"Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride)" 
 
  182template<
typename AFloat>
 
  194   size_t depth = B.GetNrows();
 
  198   cudaStream_t s = A.GetComputeStream();
 
  200   ::TMVA::DNN::Cuda::Im2Col<<<gridDims, blockDims, 0, s>>>(A.GetDataPointer(), B.GetDataPointer(), 
depth, 
imgHeight, 
imgWidth,
 
 
  206template<
typename AFloat>
 
  216   cudaStream_t s = B.GetComputeStream();
 
  218   ::TMVA::DNN::Cuda::RotateWeights<<<gridDims, blockDims, 0, s>>>(A.GetDataPointer(), B.GetDataPointer(), 
filterDepth,
 
 
  223template <
typename AFloat>
 
  235template <
typename AFloat>
 
  254   for(
size_t event = 0; 
event < 
input.GetFirstSize(); 
event++) {
 
 
  275template<
typename AFloat>
 
  323template<
typename AFloat>
 
  355   R__ASSERT( df.GetFirstSize() ==  batchSize);
 
  358   for(
size_t event = 0; 
event < batchSize; 
event++) {
 
 
  368template<
typename AFloat>
 
  386    const size_t filterSize = filterHeight * filterWidth;
 
  390    R__ASSERT( df.GetFirstSize() ==  batchSize);
 
  404    for(
size_t event = 0; 
event < batchSize; 
event++) {
 
 
  415template<
typename AFloat>
 
  424    for (
size_t event = 0; 
event < batchSize; 
event++) {
 
 
  431template<
typename AFloat>
 
  437    cudaStream_t s = 
output.GetComputeStream();
 
  438    ::TMVA::DNN::Cuda::AddBiases<<<gridDims, blockDims, 0, s>>>(
 
 
  467template<
typename AFloat>
 
  480   size_t depth = C.GetCSize();
 
  481   size_t bsize = C.GetFirstSize();
 
  485   cudaStream_t s = A.GetComputeStream();
 
  487   for(
size_t event = 0; 
event < 
bsize; 
event++) {
 
  492      ::TMVA::DNN::Cuda::Downsample<<<gridDims, blockDims, 0, s>>>(A.GetDataPointerAt(event), B.GetDataPointerAt(event),
 
 
  498template<
typename AFloat>
 
  523   for(
size_t event = 0; 
event < 
bsize; 
event++) {
 
  529                                                                     strideRows, strideCols);
 
 
  534template<
typename AFloat>
 
  539    cudaStream_t s = A.GetComputeStream();
 
  541    ::TMVA::DNN::Cuda::Reshape<<<gridDims, blockDims>>>(A.GetDataPointer(), B.GetDataPointer(),
 
  542                                                        A.GetNrows(), A.GetNcols(), B.GetNrows(), B.GetNcols());
 
 
  547template <
typename AReal>
 
  591template<
typename AFloat>
 
  596   size_t nDepth    = B.GetFirstSize();   
 
  597   size_t nRows = B.GetCSize();      
 
  598   size_t nCols = B.GetWSize();      
 
  599   if (B.GetNDim()==4) 
nCols *= B.GetHSize();
 
  604   cudaStream_t s = A.GetComputeStream();
 
  639   if (B.GetLayout() == GetTensorLayout() )
 
  640      ::TMVA::DNN::Cuda::Flatten<<<gridDims, blockDims>>>(A.GetDataPointer(), B.GetDataPointer(), 
nDepth, 
nRows, 
nCols);
 
  644      ::TMVA::DNN::Cuda::FlattenRM<<<gridDims, blockDims>>>(A.GetDataPointer(), B.GetDataPointer(), 
nDepth, 
nRows, 
nCols);
 
 
  669template<
typename AFloat>
 
  673   size_t nDepth = A.GetFirstSize();   
 
  674   size_t nRows = A.GetCSize();      
 
  675   size_t nCols = A.GetWSize();      
 
  676   if (A.GetNDim()==4) 
nCols *= A.GetHSize();
 
  682    cudaStream_t s = B.GetComputeStream();
 
  705    if (A.GetLayout() == GetTensorLayout() )
 
  706      ::TMVA::DNN::Cuda::Deflatten<<<gridDims, blockDims>>>(A.GetDataPointer(), B.GetDataPointer(), 
nDepth, 
nRows, 
nCols);
 
  709      ::TMVA::DNN::Cuda::DeflattenRM<<<gridDims, blockDims>>>(A.GetDataPointer(), B.GetDataPointer(), 
nDepth, 
nRows, 
nCols);
 
 
  728template <
typename AFloat>
 
  742template <
typename AFloat>
 
  753template <
typename AFloat>
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
 
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
 
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
 
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
 
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
 
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
 
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
 
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
 
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
 
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
 
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
 
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
 
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
 
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
 
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
 
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
 
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
 
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
 
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
 
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
 
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
 
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
 
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
 
static void Copy(Matrix_t &B, const Matrix_t &A)
 
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
 
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
 
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
 
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
 
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
 
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
 
static dim3 BlockDims2D()
 
static dim3 GridDims2D(int nrows, int ncols)
 
EActivationFunction
Enum that represents layer activation functions.
 
create variable transformations
 
size_t strideRows
The number of row pixels to slid the filter each step.
 
size_t filterHeight
The height of the filter.
 
size_t inputHeight
The height of the previous layer or input.
 
size_t paddingWidth
The number of zero layers left and right of the input.
 
size_t filterWidth
The width of the filter.
 
size_t paddingHeight
The number of zero layers added top and bottom of the input.
 
size_t inputWidth
The width of the previous layer or input.
 
size_t strideCols
The number of column pixels to slid the filter each step.