Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA::DNN::Cuda Namespace Reference

Functions

template<typename AFloat >
__global__ void AbsoluteSum (AFloat *result, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void AdamUpdate (AFloat *A, const AFloat *M, const AFloat *V, int m, int n, AFloat alpha, AFloat eps)
 optimizer kernel functions
 
template<typename AFloat >
__global__ void AdamUpdateFirstMom (AFloat *A, const AFloat *B, int m, int n, AFloat beta)
 
template<typename AFloat >
__global__ void AdamUpdateSecondMom (AFloat *A, const AFloat *B, int m, int n, AFloat beta)
 
template<typename AFloat >
__global__ void AddBiases (AFloat *A, const AFloat *B, int nRows, int nCols)
 
template<typename AFloat >
__global__ void AddL1RegularizationGradients (AFloat *A, const AFloat *B, AFloat weightDecay, int m, int n)
 
template<typename AFloat >
__global__ void AddL2RegularizationGradients (AFloat *A, const AFloat *B, AFloat weightDecay, int m, int n)
 
template<typename AFloat >
__global__ void AddRowWise (AFloat *W, const AFloat *theta, int m, int n)
 
template<typename AFloat >
__global__ void AlmostEquals (bool *result, const AFloat *A, const AFloat *B, double epsilon, int m, int n)
 
template<typename AFloat >
__device__ AFloat AtomicAdd (AFloat *address, AFloat val)
 
template<>
__device__ double AtomicAdd (double *address, double val)
 
template<>
__device__ float AtomicAdd (float *address, float val)
 
__device__ int calculateDimension (int imgDim, int fltDim, int padding, int stride)
 Calculate the dimension of an output volume, given the sliding parameters and the input shape.
 
template<typename AFloat >
__global__ void ConstAdd (AFloat *A, AFloat beta, int m, int n)
 
template<typename AFloat >
__global__ void ConstMult (AFloat *A, AFloat beta, int m, int n)
 
template<typename AFloat >
__global__ void CrossEntropy (AFloat *result, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
template<typename AFloat >
__global__ void CrossEntropyGradients (AFloat *dY, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
template<typename AFloat >
__global__ void Deflatten (AFloat *A, const AFloat *B, int size, int nRows, int nCols)
 Deflatten a 2D-array into an array of 2D-arrays.
 
template<typename AFloat >
__global__ void DeflattenRM (AFloat *A, const AFloat *B, int size, int nRows, int nCols)
 
template<typename AFloat >
__global__ void Downsample (AFloat *output, AFloat *indexMatrix, const AFloat *input, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols)
 Downsampling kernel used as the forward propagation step of a Max-Pooling layer.
 
template<typename AFloat >
__global__ void Dropout (AFloat *A, int m, int n, AFloat dropoutProbability, curandState_t *state)
 
template<typename AFloat >
__global__ void Flatten (AFloat *A, const AFloat *B, int size, int nRows, int nCols)
 Flatten an array of 2D-arrays into a single 2D-array.
 
template<typename AFloat >
__global__ void FlattenRM (AFloat *A, const AFloat *B, int size, int nRows, int nCols)
 
template<typename AFloat >
__global__ void Gauss (AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void GaussDerivative (AFloat *B, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void Hadamard (AFloat *B, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void IdentityDerivative (AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void Im2Col (AFloat *A, const AFloat *B, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols, int zeroPaddingHeight, int zeroPaddingWidth)
 A kernel that re-arranges image regions of the input matrix \B, into column vectors in matrix \A.
 
template<typename AFloat >
__device__ AFloat max (AFloat x, AFloat y)
 
template<typename AFloat >
__global__ void MaxPoolBackward (AFloat *activationGradientsBackward, const AFloat *activationGradients, const AFloat *indexMatrix, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols)
 Back-propagate the gradients through a max-pooling layer.
 
template<typename AFloat >
__global__ void MeanSquaredError (AFloat *result, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
template<typename AFloat >
__global__ void MeanSquaredErrorGradients (AFloat *dY, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
template<typename AFloat >
__global__ void ReciprocalElementWise (AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void ReduceMatrix (AFloat *result, const AFloat *A, int m, int n)
 
template<typename AFloat >
__device__ void ReduceSum (AFloat *result, AFloat *sdata)
 
template<typename AFloat >
__device__ void ReduceSumVertical (AFloat *result, AFloat *sdata, int n)
 
template<typename AFloat >
__global__ void Relu (AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void ReluDerivative (AFloat *B, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void Reshape (AFloat *A, const AFloat *B, int nRowsA, int nColsA, int nRowsB, int nColsB)
 
template<typename AFloat >
__global__ void RotateWeights (AFloat *A, const AFloat *B, int filterDepth, int filterHeight, int filterWidth, int numFilters)
 
template<typename AFloat >
__global__ void Sigmoid (AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void Sigmoid (AFloat *B, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void SigmoidDerivative (AFloat *B, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void Softmax (AFloat *B, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void SoftmaxCrossEntropy (AFloat *result, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
template<typename AFloat >
__global__ void SoftmaxCrossEntropyGradients (AFloat *dY, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
template<typename AFloat >
__global__ void SoftSign (AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void SoftSignDerivative (AFloat *B, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void SqrtElementWise (AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void SquaredSum (AFloat *result, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void SquareElementWise (AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void SumColumns (AFloat *B, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void SymmetricRelu (AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void SymmetricReluDerivative (AFloat *B, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void Tanh (AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void TanhDerivative (AFloat *B, const AFloat *A, int m, int n)
 
template<typename AFloat >
__global__ void UpdateWeights (AFloat *A, const AFloat **B, int batchSize, int nRows, int nCols)
 

Function Documentation

◆ AbsoluteSum()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::AbsoluteSum ( AFloat *  result,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 729 of file Kernels.cuh.

◆ AdamUpdate()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::AdamUpdate ( AFloat *  A,
const AFloat *  M,
const AFloat *  V,
int  m,
int  n,
AFloat  alpha,
AFloat  eps 
)

optimizer kernel functions

Definition at line 408 of file Kernels.cuh.

◆ AdamUpdateFirstMom()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::AdamUpdateFirstMom ( AFloat *  A,
const AFloat *  B,
int  m,
int  n,
AFloat  beta 
)

Definition at line 422 of file Kernels.cuh.

◆ AdamUpdateSecondMom()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::AdamUpdateSecondMom ( AFloat *  A,
const AFloat *  B,
int  m,
int  n,
AFloat  beta 
)

Definition at line 436 of file Kernels.cuh.

◆ AddBiases()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::AddBiases ( AFloat *  A,
const AFloat *  B,
int  nRows,
int  nCols 
)

Definition at line 1120 of file Kernels.cuh.

◆ AddL1RegularizationGradients()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::AddL1RegularizationGradients ( AFloat *  A,
const AFloat *  B,
AFloat  weightDecay,
int  m,
int  n 
)

Definition at line 767 of file Kernels.cuh.

◆ AddL2RegularizationGradients()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::AddL2RegularizationGradients ( AFloat *  A,
const AFloat *  B,
AFloat  weightDecay,
int  m,
int  n 
)

Definition at line 784 of file Kernels.cuh.

◆ AddRowWise()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::AddRowWise ( AFloat *  W,
const AFloat *  theta,
int  m,
int  n 
)

Definition at line 307 of file Kernels.cuh.

◆ AlmostEquals()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::AlmostEquals ( bool result,
const AFloat *  A,
const AFloat *  B,
double  epsilon,
int  m,
int  n 
)

Definition at line 951 of file Kernels.cuh.

◆ AtomicAdd() [1/3]

template<typename AFloat >
__device__ AFloat TMVA::DNN::Cuda::AtomicAdd ( AFloat *  address,
AFloat  val 
)

◆ AtomicAdd() [2/3]

template<>
__device__ double TMVA::DNN::Cuda::AtomicAdd ( double address,
double  val 
)

Definition at line 34 of file Kernels.cuh.

◆ AtomicAdd() [3/3]

template<>
__device__ float TMVA::DNN::Cuda::AtomicAdd ( float *  address,
float  val 
)

Definition at line 48 of file Kernels.cuh.

◆ calculateDimension()

__device__ int TMVA::DNN::Cuda::calculateDimension ( int  imgDim,
int  fltDim,
int  padding,
int  stride 
)

Calculate the dimension of an output volume, given the sliding parameters and the input shape.

Parameters
[in]imgDimThe size of the input tensor in a spatial dimension.
[in]fltDimThe size of the sliding filter in the same dimension.
[in]paddingNumber of zeroes to pad the input with.
[in]strideNumber of pixels the kernel is sliding in each iteration.
Returns
The output dimension.

Note that no checks are performed to assert validity of the input parameters. We are allowed to assume them valid because those checks have already been performed prior to the invocation of the kernel.

Definition at line 226 of file Kernels.cuh.

◆ ConstAdd()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::ConstAdd ( AFloat *  A,
AFloat  beta,
int  m,
int  n 
)

Definition at line 335 of file Kernels.cuh.

◆ ConstMult()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::ConstMult ( AFloat *  A,
AFloat  beta,
int  m,
int  n 
)

Definition at line 349 of file Kernels.cuh.

◆ CrossEntropy()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::CrossEntropy ( AFloat *  result,
const AFloat *  Y,
const AFloat *  output,
const AFloat *  weights,
int  m,
int  n 
)

Definition at line 800 of file Kernels.cuh.

◆ CrossEntropyGradients()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::CrossEntropyGradients ( AFloat *  dY,
const AFloat *  Y,
const AFloat *  output,
const AFloat *  weights,
int  m,
int  n 
)

Definition at line 833 of file Kernels.cuh.

◆ Deflatten()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Deflatten ( AFloat *  A,
const AFloat *  B,
int  size,
int  nRows,
int  nCols 
)

Deflatten a 2D-array into an array of 2D-arrays.

Parameters
[out]AOutput array of 2D-arrays, each of which is column-major.
[in]BInput 2D-array to be split into size parts.
[in]sizeNumber of 2D-arrays in the output.
[in]nRowsNumber of rows in each matrix of the output.
[in]nColsNumber of columns on each matrix of the output.

A is a pointer to size raw TCudaMatrix pointers. Each of those will contain elements saved on column major order. However the concatenation is performed row wise. Each thread writes a single output element by locating the appropriate input index.

Definition at line 1227 of file Kernels.cuh.

◆ DeflattenRM()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::DeflattenRM ( AFloat *  A,
const AFloat *  B,
int  size,
int  nRows,
int  nCols 
)

Definition at line 1245 of file Kernels.cuh.

◆ Downsample()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Downsample ( AFloat *  output,
AFloat *  indexMatrix,
const AFloat *  input,
int  depth,
int  imgHeight,
int  imgWidth,
int  fltHeight,
int  fltWidth,
int  strideRows,
int  strideCols 
)

Downsampling kernel used as the forward propagation step of a Max-Pooling layer.

Parameters
[out]AThe output matrix. Each row corresponds to a slice and each element is the max within a receptive field.
[out]BThe winning indices matrix. Each element is the index of the max element.
[in]CThe input matrix. Each row is a slice.
[in]imgHeightThe heigh of the input.
[in]imgWidthThe output of the input.
[in]fltHeightHeight of the kernel.
[in]fltWidthWidth of the kernel.
[in]strideRowsstride size in the horizontal dimension.
[in]strideColsstride size in the vertical dimension.

Each output element is the maximum of the receptive field. The caller launches one thread per output element in order to eliminate shared write access.

Definition at line 1004 of file Kernels.cuh.

◆ Dropout()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Dropout ( AFloat *  A,
int  m,
int  n,
AFloat  dropoutProbability,
curandState_t *  state 
)

Definition at line 966 of file Kernels.cuh.

◆ Flatten()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Flatten ( AFloat *  A,
const AFloat *  B,
int  size,
int  nRows,
int  nCols 
)

Flatten an array of 2D-arrays into a single 2D-array.

Parameters
[out]AOutput 2D-array saved in column major order.
[in]BInput array of 2D-arrays. Each element is a matrix to be concatenated.
[in]sizeNumber of 2D-arrays in the input.
[in]nRowsNumber of rows in each matrix of the input.
[in]nColsNumber of columns on each matrix of the input.

B is a pointer to size raw TCudaMatrix pointers. Each of those contains elements saved on column major order. However the concatenation is performed row wise. Each thread writes a single output element by locating the appropriate input index.

Definition at line 1173 of file Kernels.cuh.

◆ FlattenRM()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::FlattenRM ( AFloat *  A,
const AFloat *  B,
int  size,
int  nRows,
int  nCols 
)

Definition at line 1193 of file Kernels.cuh.

◆ Gauss()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Gauss ( AFloat *  A,
int  m,
int  n 
)

Definition at line 650 of file Kernels.cuh.

◆ GaussDerivative()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::GaussDerivative ( AFloat *  B,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 665 of file Kernels.cuh.

◆ Hadamard()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Hadamard ( AFloat *  B,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 321 of file Kernels.cuh.

◆ IdentityDerivative()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::IdentityDerivative ( AFloat *  A,
int  m,
int  n 
)

Definition at line 450 of file Kernels.cuh.

◆ Im2Col()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Im2Col ( AFloat *  A,
const AFloat *  B,
int  depth,
int  imgHeight,
int  imgWidth,
int  fltHeight,
int  fltWidth,
int  strideRows,
int  strideCols,
int  zeroPaddingHeight,
int  zeroPaddingWidth 
)

A kernel that re-arranges image regions of the input matrix \B, into column vectors in matrix \A.

Parameters
[out]AThe output matrix. Each row corresponds to a receptive field.
[in]BThe input matrix. Each row corresponds to a row in the image view.
[in]depthThe depth of the input tensor.
[in]imgHeightThe height of the input tensor.
[in]imgWidthThe output of the input tensor
[in]fltHeightHeight of the filter.
[in]fltWidthWidth of the filter.
[in]strideRowsstride size in the horizontal dimension.
[in]strideColsstride size in the vertical dimension.
[in]zeroPaddingHeightThe padding in the horizontal dimension.
[in]zeroPaddingWidthThe padding in the vertical dimension.

The kernel should be invoked with one thread per output element. Note that matrices \A and \B have different shapes. Each thread in this kernel is responsible for filling one cell of the output matrix \A. It does so by computing the correct element to copy from the input matrix \B. We therefore never need to block. When reading this kernel it is important to keep in mind that TCudaMatrix objects are saved in column major order for compatibility with cuBLAS.

Definition at line 256 of file Kernels.cuh.

◆ max()

template<typename AFloat >
__device__ AFloat TMVA::DNN::Cuda::max ( AFloat  x,
AFloat  y 
)

Definition at line 207 of file Kernels.cuh.

◆ MaxPoolBackward()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::MaxPoolBackward ( AFloat *  activationGradientsBackward,
const AFloat *  activationGradients,
const AFloat *  indexMatrix,
int  depth,
int  imgHeight,
int  imgWidth,
int  fltHeight,
int  fltWidth,
int  strideRows,
int  strideCols 
)

Back-propagate the gradients through a max-pooling layer.

Parameters
[out]gradientsBackwardThe gradients to be written. One gradient for each neuron at the layers's input.
[in]gradientsThe gradients coming from the next layer. One gradient for each receptive field.
[in]indexMatrixWinning indices. One index for each receptive field.
[in]depthThe depth of the input tensor.
[in]imgHeightThe height of the input tensor.
[in]imgWidthThe output of the input tensor
[in]fltHeightHeight of the filter.
[in]fltWidthWidth of the filter.
[in]strideRowsstride size in the horizontal dimension.
[in]strideColsstride size in the vertical dimension.

Definition at line 1061 of file Kernels.cuh.

◆ MeanSquaredError()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::MeanSquaredError ( AFloat *  result,
const AFloat *  Y,
const AFloat *  output,
const AFloat *  weights,
int  m,
int  n 
)

Definition at line 681 of file Kernels.cuh.

◆ MeanSquaredErrorGradients()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::MeanSquaredErrorGradients ( AFloat *  dY,
const AFloat *  Y,
const AFloat *  output,
const AFloat *  weights,
int  m,
int  n 
)

Definition at line 750 of file Kernels.cuh.

◆ ReciprocalElementWise()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::ReciprocalElementWise ( AFloat *  A,
int  m,
int  n 
)

Definition at line 363 of file Kernels.cuh.

◆ ReduceMatrix()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::ReduceMatrix ( AFloat *  result,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 909 of file Kernels.cuh.

◆ ReduceSum()

template<typename AFloat >
__device__ void TMVA::DNN::Cuda::ReduceSum ( AFloat *  result,
AFloat *  sdata 
)

Definition at line 134 of file Kernels.cuh.

◆ ReduceSumVertical()

template<typename AFloat >
__device__ void TMVA::DNN::Cuda::ReduceSumVertical ( AFloat *  result,
AFloat *  sdata,
int  n 
)

Definition at line 55 of file Kernels.cuh.

◆ Relu()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Relu ( AFloat *  A,
int  m,
int  n 
)

Definition at line 463 of file Kernels.cuh.

◆ ReluDerivative()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::ReluDerivative ( AFloat *  B,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 478 of file Kernels.cuh.

◆ Reshape()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Reshape ( AFloat *  A,
const AFloat *  B,
int  nRowsA,
int  nColsA,
int  nRowsB,
int  nColsB 
)

Definition at line 1144 of file Kernels.cuh.

◆ RotateWeights()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::RotateWeights ( AFloat *  A,
const AFloat *  B,
int  filterDepth,
int  filterHeight,
int  filterWidth,
int  numFilters 
)

Definition at line 1104 of file Kernels.cuh.

◆ Sigmoid() [1/2]

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Sigmoid ( AFloat *  A,
int  m,
int  n 
)

Definition at line 493 of file Kernels.cuh.

◆ Sigmoid() [2/2]

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Sigmoid ( AFloat *  B,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 508 of file Kernels.cuh.

◆ SigmoidDerivative()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SigmoidDerivative ( AFloat *  B,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 524 of file Kernels.cuh.

◆ Softmax()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Softmax ( AFloat *  B,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 540 of file Kernels.cuh.

◆ SoftmaxCrossEntropy()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SoftmaxCrossEntropy ( AFloat *  result,
const AFloat *  Y,
const AFloat *  output,
const AFloat *  weights,
int  m,
int  n 
)

Definition at line 853 of file Kernels.cuh.

◆ SoftmaxCrossEntropyGradients()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SoftmaxCrossEntropyGradients ( AFloat *  dY,
const AFloat *  Y,
const AFloat *  output,
const AFloat *  weights,
int  m,
int  n 
)

Definition at line 884 of file Kernels.cuh.

◆ SoftSign()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SoftSign ( AFloat *  A,
int  m,
int  n 
)

Definition at line 619 of file Kernels.cuh.

◆ SoftSignDerivative()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SoftSignDerivative ( AFloat *  B,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 634 of file Kernels.cuh.

◆ SqrtElementWise()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SqrtElementWise ( AFloat *  A,
int  m,
int  n 
)

Definition at line 391 of file Kernels.cuh.

◆ SquaredSum()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SquaredSum ( AFloat *  result,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 707 of file Kernels.cuh.

◆ SquareElementWise()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SquareElementWise ( AFloat *  A,
int  m,
int  n 
)

Definition at line 377 of file Kernels.cuh.

◆ SumColumns()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SumColumns ( AFloat *  B,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 929 of file Kernels.cuh.

◆ SymmetricRelu()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SymmetricRelu ( AFloat *  A,
int  m,
int  n 
)

Definition at line 590 of file Kernels.cuh.

◆ SymmetricReluDerivative()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::SymmetricReluDerivative ( AFloat *  B,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 604 of file Kernels.cuh.

◆ Tanh()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::Tanh ( AFloat *  A,
int  m,
int  n 
)

Definition at line 559 of file Kernels.cuh.

◆ TanhDerivative()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::TanhDerivative ( AFloat *  B,
const AFloat *  A,
int  m,
int  n 
)

Definition at line 574 of file Kernels.cuh.

◆ UpdateWeights()

template<typename AFloat >
__global__ void TMVA::DNN::Cuda::UpdateWeights ( AFloat *  A,
const AFloat **  B,
int  batchSize,
int  nRows,
int  nCols 
)

Definition at line 1130 of file Kernels.cuh.