Functions
template<typename AFloat >
__global__ void	AbsoluteSum (AFloat result, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	AdamUpdate (AFloat A, const AFloat M, const AFloat *V, int m, int n, AFloat alpha, AFloat eps)
	optimizer kernel functions

template<typename AFloat >
__global__ void	AdamUpdateFirstMom (AFloat A, const AFloat B, int m, int n, AFloat beta)

template<typename AFloat >
__global__ void	AdamUpdateSecondMom (AFloat A, const AFloat B, int m, int n, AFloat beta)

template<typename AFloat >
__global__ void	AddBiases (AFloat A, const AFloat B, int nRows, int nCols)

template<typename AFloat >
__global__ void	AddL1RegularizationGradients (AFloat A, const AFloat B, AFloat weightDecay, int m, int n)

template<typename AFloat >
__global__ void	AddL2RegularizationGradients (AFloat A, const AFloat B, AFloat weightDecay, int m, int n)

template<typename AFloat >
__global__ void	AddRowWise (AFloat W, const AFloat theta, int m, int n)

template<typename AFloat >
__global__ void	AlmostEquals (bool result, const AFloat A, const AFloat *B, double epsilon, int m, int n)

template<typename AFloat >
__device__ AFloat	AtomicAdd (AFloat *address, AFloat val)

template<>
__device__ double	AtomicAdd (double *address, double val)

template<>
__device__ float	AtomicAdd (float *address, float val)

__device__ int	calculateDimension (int imgDim, int fltDim, int padding, int stride)
	Calculate the dimension of an output volume, given the sliding parameters and the input shape.

template<typename AFloat >
__global__ void	ConstAdd (AFloat *A, AFloat beta, int m, int n)

template<typename AFloat >
__global__ void	ConstMult (AFloat *A, AFloat beta, int m, int n)

template<typename AFloat >
__global__ void	CrossEntropy (AFloat result, const AFloat Y, const AFloat output, const AFloat weights, int m, int n)

template<typename AFloat >
__global__ void	CrossEntropyGradients (AFloat dY, const AFloat Y, const AFloat output, const AFloat weights, int m, int n)

template<typename AFloat >
__global__ void	Deflatten (AFloat A, const AFloat B, int size, int nRows, int nCols)
	Deflatten a 2D-array into an array of 2D-arrays.

template<typename AFloat >
__global__ void	DeflattenRM (AFloat A, const AFloat B, int size, int nRows, int nCols)

template<typename AFloat >
__global__ void	Downsample (AFloat output, AFloat indexMatrix, const AFloat *input, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols)
	Downsampling kernel used as the forward propagation step of a Max-Pooling layer.

template<typename AFloat >
__global__ void	Dropout (AFloat A, int m, int n, AFloat dropoutProbability, curandState_t state)

template<typename AFloat >
__global__ void	Flatten (AFloat A, const AFloat B, int size, int nRows, int nCols)
	Flatten an array of 2D-arrays into a single 2D-array.

template<typename AFloat >
__global__ void	FlattenRM (AFloat A, const AFloat B, int size, int nRows, int nCols)

template<typename AFloat >
__global__ void	Gauss (AFloat *A, int m, int n)

template<typename AFloat >
__global__ void	GaussDerivative (AFloat B, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	Hadamard (AFloat B, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	IdentityDerivative (AFloat *A, int m, int n)

template<typename AFloat >
__global__ void	Im2Col (AFloat A, const AFloat B, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols, int zeroPaddingHeight, int zeroPaddingWidth)
	A kernel that re-arranges image regions of the input matrix \B, into column vectors in matrix \A.

template<typename AFloat >
__device__ AFloat	max (AFloat x, AFloat y)

template<typename AFloat >
__global__ void	MaxPoolBackward (AFloat activationGradientsBackward, const AFloat activationGradients, const AFloat *indexMatrix, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols)
	Back-propagate the gradients through a max-pooling layer.

template<typename AFloat >
__global__ void	MeanSquaredError (AFloat result, const AFloat Y, const AFloat output, const AFloat weights, int m, int n)

template<typename AFloat >
__global__ void	MeanSquaredErrorGradients (AFloat dY, const AFloat Y, const AFloat output, const AFloat weights, int m, int n)

template<typename AFloat >
__global__ void	ReciprocalElementWise (AFloat *A, int m, int n)

template<typename AFloat >
__global__ void	ReduceMatrix (AFloat result, const AFloat A, int m, int n)

template<typename AFloat >
__device__ void	ReduceSum (AFloat result, AFloat sdata)

template<typename AFloat >
__device__ void	ReduceSumVertical (AFloat result, AFloat sdata, int n)

template<typename AFloat >
__global__ void	Relu (AFloat *A, int m, int n)

template<typename AFloat >
__global__ void	ReluDerivative (AFloat B, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	Reshape (AFloat A, const AFloat B, int nRowsA, int nColsA, int nRowsB, int nColsB)

template<typename AFloat >
__global__ void	RotateWeights (AFloat A, const AFloat B, int filterDepth, int filterHeight, int filterWidth, int numFilters)

template<typename AFloat >
__global__ void	Sigmoid (AFloat *A, int m, int n)

template<typename AFloat >
__global__ void	Sigmoid (AFloat B, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	SigmoidDerivative (AFloat B, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	Softmax (AFloat B, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	SoftmaxCrossEntropy (AFloat result, const AFloat Y, const AFloat output, const AFloat weights, int m, int n)

template<typename AFloat >
__global__ void	SoftmaxCrossEntropyGradients (AFloat dY, const AFloat Y, const AFloat output, const AFloat weights, int m, int n)

template<typename AFloat >
__global__ void	SoftSign (AFloat *A, int m, int n)

template<typename AFloat >
__global__ void	SoftSignDerivative (AFloat B, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	SqrtElementWise (AFloat *A, int m, int n)

template<typename AFloat >
__global__ void	SquaredSum (AFloat result, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	SquareElementWise (AFloat *A, int m, int n)

template<typename AFloat >
__global__ void	SumColumns (AFloat B, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	SymmetricRelu (AFloat *A, int m, int n)

template<typename AFloat >
__global__ void	SymmetricReluDerivative (AFloat B, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	Tanh (AFloat *A, int m, int n)

template<typename AFloat >
__global__ void	TanhDerivative (AFloat B, const AFloat A, int m, int n)

template<typename AFloat >
__global__ void	UpdateWeights (AFloat A, const AFloat *B, int batchSize, int nRows, int nCols)

Function Documentation

◆ AbsoluteSum()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::AbsoluteSum	(	AFloat *	result,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 729 of file Kernels.cuh.

◆ AdamUpdate()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::AdamUpdate	(	AFloat *	A,
		const AFloat *	M,
		const AFloat *	V,
		int	m,
		int	n,
		AFloat	alpha,
		AFloat	eps
	)

optimizer kernel functions

Definition at line 408 of file Kernels.cuh.

◆ AdamUpdateFirstMom()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::AdamUpdateFirstMom	(	AFloat *	A,
		const AFloat *	B,
		int	m,
		int	n,
		AFloat	beta
	)

Definition at line 422 of file Kernels.cuh.

◆ AdamUpdateSecondMom()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::AdamUpdateSecondMom	(	AFloat *	A,
		const AFloat *	B,
		int	m,
		int	n,
		AFloat	beta
	)

Definition at line 436 of file Kernels.cuh.

◆ AddBiases()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::AddBiases	(	AFloat *	A,
		const AFloat *	B,
		int	nRows,
		int	nCols
	)

Definition at line 1120 of file Kernels.cuh.

◆ AddL1RegularizationGradients()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::AddL1RegularizationGradients	(	AFloat *	A,
		const AFloat *	B,
		AFloat	weightDecay,
		int	m,
		int	n
	)

Definition at line 767 of file Kernels.cuh.

◆ AddL2RegularizationGradients()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::AddL2RegularizationGradients	(	AFloat *	A,
		const AFloat *	B,
		AFloat	weightDecay,
		int	m,
		int	n
	)

Definition at line 784 of file Kernels.cuh.

◆ AddRowWise()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::AddRowWise	(	AFloat *	W,
		const AFloat *	theta,
		int	m,
		int	n
	)

Definition at line 307 of file Kernels.cuh.

◆ AlmostEquals()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::AlmostEquals	(	bool *	result,
		const AFloat *	A,
		const AFloat *	B,
		double	epsilon,
		int	m,
		int	n
	)

Definition at line 951 of file Kernels.cuh.

◆ AtomicAdd() [1/3]

template<typename AFloat >

__device__ AFloat TMVA::DNN::Cuda::AtomicAdd	(	AFloat *	address,
		AFloat	val
	)

◆ AtomicAdd() [2/3]

template<>

__device__ double TMVA::DNN::Cuda::AtomicAdd	(	double *	address,
		double	val
	)

Definition at line 34 of file Kernels.cuh.

◆ AtomicAdd() [3/3]

template<>

__device__ float TMVA::DNN::Cuda::AtomicAdd	(	float *	address,
		float	val
	)

Definition at line 48 of file Kernels.cuh.

◆ calculateDimension()

__device__ int TMVA::DNN::Cuda::calculateDimension	(	int	imgDim,
		int	fltDim,
		int	padding,
		int	stride
	)

Calculate the dimension of an output volume, given the sliding parameters and the input shape.

Parameters

[in]	imgDim	The size of the input tensor in a spatial dimension.
[in]	fltDim	The size of the sliding filter in the same dimension.
[in]	padding	Number of zeroes to pad the input with.
[in]	stride	Number of pixels the kernel is sliding in each iteration.

Returns: The output dimension.

Note that no checks are performed to assert validity of the input parameters. We are allowed to assume them valid because those checks have already been performed prior to the invocation of the kernel.

Definition at line 226 of file Kernels.cuh.

◆ ConstAdd()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::ConstAdd	(	AFloat *	A,
		AFloat	beta,
		int	m,
		int	n
	)

Definition at line 335 of file Kernels.cuh.

◆ ConstMult()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::ConstMult	(	AFloat *	A,
		AFloat	beta,
		int	m,
		int	n
	)

Definition at line 349 of file Kernels.cuh.

◆ CrossEntropy()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::CrossEntropy	(	AFloat *	result,
		const AFloat *	Y,
		const AFloat *	output,
		const AFloat *	weights,
		int	m,
		int	n
	)

Definition at line 800 of file Kernels.cuh.

◆ CrossEntropyGradients()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::CrossEntropyGradients	(	AFloat *	dY,
		const AFloat *	Y,
		const AFloat *	output,
		const AFloat *	weights,
		int	m,
		int	n
	)

Definition at line 833 of file Kernels.cuh.

◆ Deflatten()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Deflatten	(	AFloat *	A,
		const AFloat *	B,
		int	size,
		int	nRows,
		int	nCols
	)

Deflatten a 2D-array into an array of 2D-arrays.

Parameters

[out]	A	Output array of 2D-arrays, each of which is column-major.
[in]	B	Input 2D-array to be split into `size` parts.
[in]	size	Number of 2D-arrays in the output.
[in]	nRows	Number of rows in each matrix of the output.
[in]	nCols	Number of columns on each matrix of the output.

A is a pointer to size raw TCudaMatrix pointers. Each of those will contain elements saved on column major order. However the concatenation is performed row wise. Each thread writes a single output element by locating the appropriate input index.

Definition at line 1227 of file Kernels.cuh.

◆ DeflattenRM()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::DeflattenRM	(	AFloat *	A,
		const AFloat *	B,
		int	size,
		int	nRows,
		int	nCols
	)

Definition at line 1245 of file Kernels.cuh.

◆ Downsample()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Downsample	(	AFloat *	output,
		AFloat *	indexMatrix,
		const AFloat *	input,
		int	depth,
		int	imgHeight,
		int	imgWidth,
		int	fltHeight,
		int	fltWidth,
		int	strideRows,
		int	strideCols
	)

Downsampling kernel used as the forward propagation step of a Max-Pooling layer.

Parameters

[out]	A	The output matrix. Each row corresponds to a slice and each element is the max within a receptive field.
[out]	B	The winning indices matrix. Each element is the index of the max element.
[in]	C	The input matrix. Each row is a slice.
[in]	imgHeight	The heigh of the input.
[in]	imgWidth	The output of the input.
[in]	fltHeight	Height of the kernel.
[in]	fltWidth	Width of the kernel.
[in]	strideRows	stride size in the horizontal dimension.
[in]	strideCols	stride size in the vertical dimension.

Each output element is the maximum of the receptive field. The caller launches one thread per output element in order to eliminate shared write access.

Definition at line 1004 of file Kernels.cuh.

◆ Dropout()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Dropout	(	AFloat *	A,
		int	m,
		int	n,
		AFloat	dropoutProbability,
		curandState_t *	state
	)

Definition at line 966 of file Kernels.cuh.

◆ Flatten()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Flatten	(	AFloat *	A,
		const AFloat *	B,
		int	size,
		int	nRows,
		int	nCols
	)

Flatten an array of 2D-arrays into a single 2D-array.

Parameters

[out]	A	Output 2D-array saved in column major order.
[in]	B	Input array of 2D-arrays. Each element is a matrix to be concatenated.
[in]	size	Number of 2D-arrays in the input.
[in]	nRows	Number of rows in each matrix of the input.
[in]	nCols	Number of columns on each matrix of the input.

B is a pointer to size raw TCudaMatrix pointers. Each of those contains elements saved on column major order. However the concatenation is performed row wise. Each thread writes a single output element by locating the appropriate input index.

Definition at line 1173 of file Kernels.cuh.

◆ FlattenRM()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::FlattenRM	(	AFloat *	A,
		const AFloat *	B,
		int	size,
		int	nRows,
		int	nCols
	)

Definition at line 1193 of file Kernels.cuh.

◆ Gauss()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Gauss	(	AFloat *	A,
		int	m,
		int	n
	)

Definition at line 650 of file Kernels.cuh.

◆ GaussDerivative()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::GaussDerivative	(	AFloat *	B,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 665 of file Kernels.cuh.

◆ Hadamard()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Hadamard	(	AFloat *	B,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 321 of file Kernels.cuh.

◆ IdentityDerivative()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::IdentityDerivative	(	AFloat *	A,
		int	m,
		int	n
	)

Definition at line 450 of file Kernels.cuh.

◆ Im2Col()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Im2Col	(	AFloat *	A,
		const AFloat *	B,
		int	depth,
		int	imgHeight,
		int	imgWidth,
		int	fltHeight,
		int	fltWidth,
		int	strideRows,
		int	strideCols,
		int	zeroPaddingHeight,
		int	zeroPaddingWidth
	)

A kernel that re-arranges image regions of the input matrix \B, into column vectors in matrix \A.

Parameters

[out]	A	The output matrix. Each row corresponds to a receptive field.
[in]	B	The input matrix. Each row corresponds to a row in the image view.
[in]	depth	The depth of the input tensor.
[in]	imgHeight	The height of the input tensor.
[in]	imgWidth	The output of the input tensor
[in]	fltHeight	Height of the filter.
[in]	fltWidth	Width of the filter.
[in]	strideRows	stride size in the horizontal dimension.
[in]	strideCols	stride size in the vertical dimension.
[in]	zeroPaddingHeight	The padding in the horizontal dimension.
[in]	zeroPaddingWidth	The padding in the vertical dimension.

The kernel should be invoked with one thread per output element. Note that matrices \A and \B have different shapes. Each thread in this kernel is responsible for filling one cell of the output matrix \A. It does so by computing the correct element to copy from the input matrix \B. We therefore never need to block. When reading this kernel it is important to keep in mind that TCudaMatrix objects are saved in column major order for compatibility with cuBLAS.

Definition at line 256 of file Kernels.cuh.

◆ max()

template<typename AFloat >

__device__ AFloat TMVA::DNN::Cuda::max	(	AFloat	x,
		AFloat	y
	)

Definition at line 207 of file Kernels.cuh.

◆ MaxPoolBackward()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::MaxPoolBackward	(	AFloat *	activationGradientsBackward,
		const AFloat *	activationGradients,
		const AFloat *	indexMatrix,
		int	depth,
		int	imgHeight,
		int	imgWidth,
		int	fltHeight,
		int	fltWidth,
		int	strideRows,
		int	strideCols
	)

Back-propagate the gradients through a max-pooling layer.

Parameters

[out]	gradientsBackward	The gradients to be written. One gradient for each neuron at the layers's input.
[in]	gradients	The gradients coming from the next layer. One gradient for each receptive field.
[in]	indexMatrix	Winning indices. One index for each receptive field.
[in]	depth	The depth of the input tensor.
[in]	imgHeight	The height of the input tensor.
[in]	imgWidth	The output of the input tensor
[in]	fltHeight	Height of the filter.
[in]	fltWidth	Width of the filter.
[in]	strideRows	stride size in the horizontal dimension.
[in]	strideCols	stride size in the vertical dimension.

Definition at line 1061 of file Kernels.cuh.

◆ MeanSquaredError()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::MeanSquaredError	(	AFloat *	result,
		const AFloat *	Y,
		const AFloat *	output,
		const AFloat *	weights,
		int	m,
		int	n
	)

Definition at line 681 of file Kernels.cuh.

◆ MeanSquaredErrorGradients()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::MeanSquaredErrorGradients	(	AFloat *	dY,
		const AFloat *	Y,
		const AFloat *	output,
		const AFloat *	weights,
		int	m,
		int	n
	)

Definition at line 750 of file Kernels.cuh.

◆ ReciprocalElementWise()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::ReciprocalElementWise	(	AFloat *	A,
		int	m,
		int	n
	)

Definition at line 363 of file Kernels.cuh.

◆ ReduceMatrix()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::ReduceMatrix	(	AFloat *	result,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 909 of file Kernels.cuh.

◆ ReduceSum()

template<typename AFloat >

__device__ void TMVA::DNN::Cuda::ReduceSum	(	AFloat *	result,
		AFloat *	sdata
	)

Definition at line 134 of file Kernels.cuh.

◆ ReduceSumVertical()

template<typename AFloat >

__device__ void TMVA::DNN::Cuda::ReduceSumVertical	(	AFloat *	result,
		AFloat *	sdata,
		int	n
	)

Definition at line 55 of file Kernels.cuh.

◆ Relu()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Relu	(	AFloat *	A,
		int	m,
		int	n
	)

Definition at line 463 of file Kernels.cuh.

◆ ReluDerivative()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::ReluDerivative	(	AFloat *	B,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 478 of file Kernels.cuh.

◆ Reshape()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Reshape	(	AFloat *	A,
		const AFloat *	B,
		int	nRowsA,
		int	nColsA,
		int	nRowsB,
		int	nColsB
	)

Definition at line 1144 of file Kernels.cuh.

◆ RotateWeights()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::RotateWeights	(	AFloat *	A,
		const AFloat *	B,
		int	filterDepth,
		int	filterHeight,
		int	filterWidth,
		int	numFilters
	)

Definition at line 1104 of file Kernels.cuh.

◆ Sigmoid() [1/2]

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Sigmoid	(	AFloat *	A,
		int	m,
		int	n
	)

Definition at line 493 of file Kernels.cuh.

◆ Sigmoid() [2/2]

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Sigmoid	(	AFloat *	B,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 508 of file Kernels.cuh.

◆ SigmoidDerivative()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SigmoidDerivative	(	AFloat *	B,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 524 of file Kernels.cuh.

◆ Softmax()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Softmax	(	AFloat *	B,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 540 of file Kernels.cuh.

◆ SoftmaxCrossEntropy()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SoftmaxCrossEntropy	(	AFloat *	result,
		const AFloat *	Y,
		const AFloat *	output,
		const AFloat *	weights,
		int	m,
		int	n
	)

Definition at line 853 of file Kernels.cuh.

◆ SoftmaxCrossEntropyGradients()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SoftmaxCrossEntropyGradients	(	AFloat *	dY,
		const AFloat *	Y,
		const AFloat *	output,
		const AFloat *	weights,
		int	m,
		int	n
	)

Definition at line 884 of file Kernels.cuh.

◆ SoftSign()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SoftSign	(	AFloat *	A,
		int	m,
		int	n
	)

Definition at line 619 of file Kernels.cuh.

◆ SoftSignDerivative()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SoftSignDerivative	(	AFloat *	B,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 634 of file Kernels.cuh.

◆ SqrtElementWise()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SqrtElementWise	(	AFloat *	A,
		int	m,
		int	n
	)

Definition at line 391 of file Kernels.cuh.

◆ SquaredSum()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SquaredSum	(	AFloat *	result,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 707 of file Kernels.cuh.

◆ SquareElementWise()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SquareElementWise	(	AFloat *	A,
		int	m,
		int	n
	)

Definition at line 377 of file Kernels.cuh.

◆ SumColumns()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SumColumns	(	AFloat *	B,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 929 of file Kernels.cuh.

◆ SymmetricRelu()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SymmetricRelu	(	AFloat *	A,
		int	m,
		int	n
	)

Definition at line 590 of file Kernels.cuh.

◆ SymmetricReluDerivative()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::SymmetricReluDerivative	(	AFloat *	B,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 604 of file Kernels.cuh.

◆ Tanh()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::Tanh	(	AFloat *	A,
		int	m,
		int	n
	)

Definition at line 559 of file Kernels.cuh.

◆ TanhDerivative()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::TanhDerivative	(	AFloat *	B,
		const AFloat *	A,
		int	m,
		int	n
	)

Definition at line 574 of file Kernels.cuh.

◆ UpdateWeights()

template<typename AFloat >

__global__ void TMVA::DNN::Cuda::UpdateWeights	(	AFloat *	A,
		const AFloat **	B,
		int	batchSize,
		int	nRows,
		int	nCols
	)

Definition at line 1130 of file Kernels.cuh.

Functions

Function Documentation

◆ AbsoluteSum()

◆ AdamUpdate()

◆ AdamUpdateFirstMom()

◆ AdamUpdateSecondMom()

◆ AddBiases()

◆ AddL1RegularizationGradients()

◆ AddL2RegularizationGradients()

◆ AddRowWise()

◆ AlmostEquals()

◆ AtomicAdd() [1/3]

◆ AtomicAdd() [2/3]

◆ AtomicAdd() [3/3]

◆ calculateDimension()

◆ ConstAdd()

◆ ConstMult()

◆ CrossEntropy()

◆ CrossEntropyGradients()

◆ Deflatten()

◆ DeflattenRM()

◆ Downsample()

◆ Dropout()

◆ Flatten()

◆ FlattenRM()

◆ Gauss()

◆ GaussDerivative()

◆ Hadamard()

◆ IdentityDerivative()

◆ Im2Col()

◆ max()

◆ MaxPoolBackward()

◆ MeanSquaredError()

◆ MeanSquaredErrorGradients()

◆ ReciprocalElementWise()

◆ ReduceMatrix()

◆ ReduceSum()

◆ ReduceSumVertical()

◆ Relu()

◆ ReluDerivative()

◆ Reshape()

◆ RotateWeights()

◆ Sigmoid() [1/2]

◆ Sigmoid() [2/2]

◆ SigmoidDerivative()

◆ Softmax()

◆ SoftmaxCrossEntropy()

◆ SoftmaxCrossEntropyGradients()

◆ SoftSign()

◆ SoftSignDerivative()

◆ SqrtElementWise()

◆ SquaredSum()

◆ SquareElementWise()

◆ SumColumns()

◆ SymmetricRelu()

◆ SymmetricReluDerivative()

◆ Tanh()

◆ TanhDerivative()

◆ UpdateWeights()