template<typename AFloat>
class TMVA::DNN::TCudaTensor< AFloat >

The TCudaTensor class extends the TCudaMatrix class for dimensions > 2.

Definition at line 83 of file CudaTensor.h.

Classes
struct	TensorDescriptor

Public Types
using	MemoryLayout = TMVA::Experimental::MemoryLayout

using	Scalar_t = AFloat

using	Shape_t = std::vector< size_t >

Public Member Functions
	TCudaTensor ()

	TCudaTensor (const AFloat *data, const std::vector< size_t > &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)

	TCudaTensor (const std::vector< size_t > &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)

	TCudaTensor (const TCudaMatrix< AFloat > &m, size_t dim=2)

	TCudaTensor (const TCudaTensor &)=default

	TCudaTensor (const TMatrixT< AFloat > &m, size_t dim=2)

	TCudaTensor (size_t bsize, size_t csize, size_t hsize, size_t wsize, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)

	TCudaTensor (size_t bsize, size_t csize, size_t hwsize, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)

	TCudaTensor (size_t n, size_t m, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)

	TCudaTensor (TCudaDeviceBuffer< AFloat > buffer, const std::vector< size_t > &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)

	TCudaTensor (TCudaDeviceBuffer< AFloat > buffer, size_t n, size_t m)

	TCudaTensor (TCudaTensor &&)=default

	~TCudaTensor ()

TCudaTensor< AFloat >	At (size_t i) const

cudaStream_t	GetComputeStream () const

size_t	GetCSize () const

AFloat *	GetData ()

const AFloat *	GetData () const

AFloat *	GetDataPointer ()

const AFloat *	GetDataPointer () const

AFloat *	GetDataPointerAt (size_t i)

const AFloat *	GetDataPointerAt (size_t i) const

TCudaDeviceBuffer< AFloat > &	GetDeviceBuffer ()

const TCudaDeviceBuffer< AFloat > &	GetDeviceBuffer () const

size_t	GetDimAt (size_t i) const

size_t	GetFirstSize () const

size_t	GetFirstStride () const

size_t	GetHSize () const

MemoryLayout	GetLayout () const

TCudaMatrix< AFloat >	GetMatrix () const

size_t	GetNcols () const

size_t	GetNDim () const

size_t	GetNrows () const

const Shape_t &	GetShape () const

size_t	GetSize () const

const Shape_t &	GetStrides () const

size_t	GetWSize () const

bool	isEqual (const AFloat *hostBufferOther, size_t otherSize)

bool	isEqual (TCudaTensor< AFloat > &other)

	operator TMatrixT () const
	Convert cuda matrix to Root TMatrix.

TCudaDeviceReference< AFloat >	operator() (size_t i, size_t j) const

TCudaDeviceReference< AFloat >	operator() (size_t i, size_t j, size_t k) const

TCudaDeviceReference< AFloat >	operator() (size_t i, size_t j, size_t k, size_t l) const

TCudaTensor &	operator= (const TCudaTensor &)=default

TCudaTensor &	operator= (TCudaTensor &&)=default

TCudaMatrix< AFloat >	operator[] (size_t i) const

void	Print (const char *name="Tensor", bool truncate=false) const

void	PrintShape (const char *name="Tensor") const

TCudaTensor< AFloat >	Reshape (const Shape_t &newShape) const

void	ReshapeInPlace (const Shape_t &newShape)

void	SetComputeStream (cudaStream_t stream)

void	SetConstVal (const AFloat constVal)

void	SetTensorDescriptor ()

void	Zero ()

Static Public Member Functions
static std::vector< std::size_t >	ComputeStridesFromShape (const std::vector< std::size_t > &shape, bool rowmajorLayout)

Private Member Functions
void	InitializeCuda ()
	Initializes all shared devices resource and makes sure that a sufficient number of curand states are allocated on the device and initialized as well as that the one-vector for the summation over columns has the right size.

void	InitializeCurandStates ()

Private Attributes
int	fDevice
	Device associated with current tensor instance.

TCudaDeviceBuffer< AFloat >	fElementBuffer

MemoryLayout	fMemoryLayout

size_t	fNDim
	Dimension of the tensor (first dimension is the batch size, second is the no. channels)

Shape_t	fShape
	The shape vector (size of dimensions) needs to be ordered as no.

size_t	fSize
	No. of elements.

int	fStreamIndx
	Cuda stream associated with current instance.

Shape_t	fStrides
	Strides between tensor dimensions (always assume dense, non overlapping tensor)

std::shared_ptr< TensorDescriptor >	fTensorDescriptor

Static Private Attributes
static std::vector< int >	fInstances
	For each GPU device keep the CUDA streams in which tensors are used.

#include <TMVA/DNN/Architectures/Cuda/CudaTensor.h>

Member Typedef Documentation

◆ MemoryLayout

template<typename AFloat >

using TMVA::DNN::TCudaTensor< AFloat >::MemoryLayout = TMVA::Experimental:: MemoryLayout

Definition at line 88 of file CudaTensor.h.

◆ Scalar_t

template<typename AFloat >

using TMVA::DNN::TCudaTensor< AFloat >::Scalar_t = AFloat

Definition at line 89 of file CudaTensor.h.

◆ Shape_t

template<typename AFloat >

using TMVA::DNN::TCudaTensor< AFloat >::Shape_t = std::vector<size_t>

Definition at line 87 of file CudaTensor.h.

Constructor & Destructor Documentation

◆ TCudaTensor() [1/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( )

◆ TCudaTensor() [2/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor	(	const AFloat *	data,
		const std::vector< size_t > &	shape,
		MemoryLayout	memlayout = `MemoryLayout::ColumnMajor`,
		int	deviceIndx = `0`,
		int	streamIndx = `0`
	)

◆ TCudaTensor() [3/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor	(	TCudaDeviceBuffer< AFloat >	buffer,
		const std::vector< size_t > &	shape,
		MemoryLayout	memlayout = `MemoryLayout::ColumnMajor`,
		int	deviceIndx = `0`,
		int	streamIndx = `0`
	)

◆ TCudaTensor() [4/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor	(	const std::vector< size_t > &	shape,
		MemoryLayout	memlayout = `MemoryLayout::ColumnMajor`,
		int	deviceIndx = `0`,
		int	streamIndx = `0`
	)

◆ TCudaTensor() [5/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor	(	size_t	bsize,
		size_t	csize,
		size_t	hwsize,
		MemoryLayout	memlayout = `MemoryLayout::ColumnMajor`,
		int	deviceIndx = `0`,
		int	streamIndx = `0`
	)

inline

Definition at line 149 of file CudaTensor.h.

◆ TCudaTensor() [6/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor	(	size_t	bsize,
		size_t	csize,
		size_t	hsize,
		size_t	wsize,
		MemoryLayout	memlayout = `MemoryLayout::ColumnMajor`,
		int	deviceIndx = `0`,
		int	streamIndx = `0`
	)

inline

Definition at line 154 of file CudaTensor.h.

◆ TCudaTensor() [7/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor	(	size_t	n,
		size_t	m,
		MemoryLayout	memlayout = `MemoryLayout::ColumnMajor`,
		int	deviceIndx = `0`,
		int	streamIndx = `0`
	)

inline

Definition at line 162 of file CudaTensor.h.

◆ TCudaTensor() [8/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor	(	const TCudaMatrix< AFloat > &	m,
		size_t	dim = `2`
	)

◆ TCudaTensor() [9/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor	(	const TMatrixT< AFloat > &	m,
		size_t	dim = `2`
	)

inline

Definition at line 169 of file CudaTensor.h.

◆ TCudaTensor() [10/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor	(	TCudaDeviceBuffer< AFloat >	buffer,
		size_t	n,
		size_t	m
	)

inline

Definition at line 173 of file CudaTensor.h.

◆ TCudaTensor() [11/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( const TCudaTensor< AFloat > & )

default

◆ TCudaTensor() [12/12]

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( TCudaTensor< AFloat > && )

default

◆ ~TCudaTensor()

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::~TCudaTensor ( )

Member Function Documentation

◆ At()

template<typename AFloat >

TCudaTensor< AFloat > TMVA::DNN::TCudaTensor< AFloat >::At ( size_t i ) const

inline

Definition at line 364 of file CudaTensor.h.

◆ ComputeStridesFromShape()

template<typename AFloat >

static std::vector< std::size_t > TMVA::DNN::TCudaTensor< AFloat >::ComputeStridesFromShape	(	const std::vector< std::size_t > &	shape,
		bool	rowmajorLayout
	)

inlinestatic

◆ GetComputeStream()

template<typename AFloat >

cudaStream_t TMVA::DNN::TCudaTensor< AFloat >::GetComputeStream ( ) const

inline

Definition at line 213 of file CudaTensor.h.

◆ GetCSize()

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::GetCSize ( ) const

inline

Definition at line 279 of file CudaTensor.h.

◆ GetData() [1/2]

template<typename AFloat >

AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetData ( )

inline

Definition at line 197 of file CudaTensor.h.

◆ GetData() [2/2]

template<typename AFloat >

const AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetData ( ) const

inline

Definition at line 196 of file CudaTensor.h.

◆ GetDataPointer() [1/2]

template<typename AFloat >

AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetDataPointer ( )

inline

Definition at line 195 of file CudaTensor.h.

◆ GetDataPointer() [2/2]

template<typename AFloat >

const AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetDataPointer ( ) const

inline

Definition at line 194 of file CudaTensor.h.

◆ GetDataPointerAt() [1/2]

template<typename AFloat >

AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetDataPointerAt ( size_t i )

inline

Definition at line 201 of file CudaTensor.h.

◆ GetDataPointerAt() [2/2]

template<typename AFloat >

const AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetDataPointerAt ( size_t i ) const

inline

Definition at line 199 of file CudaTensor.h.

◆ GetDeviceBuffer() [1/2]

template<typename AFloat >

TCudaDeviceBuffer< AFloat > & TMVA::DNN::TCudaTensor< AFloat >::GetDeviceBuffer ( )

inline

Definition at line 205 of file CudaTensor.h.

◆ GetDeviceBuffer() [2/2]

template<typename AFloat >

const TCudaDeviceBuffer< AFloat > & TMVA::DNN::TCudaTensor< AFloat >::GetDeviceBuffer ( ) const

inline

Definition at line 204 of file CudaTensor.h.

◆ GetDimAt()

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::GetDimAt ( size_t i ) const

inline

Definition at line 190 of file CudaTensor.h.

◆ GetFirstSize()

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::GetFirstSize ( ) const

inline

Definition at line 274 of file CudaTensor.h.

◆ GetFirstStride()

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::GetFirstStride ( ) const

inline

Definition at line 276 of file CudaTensor.h.

◆ GetHSize()

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::GetHSize ( ) const

inline

Definition at line 283 of file CudaTensor.h.

◆ GetLayout()

template<typename AFloat >

MemoryLayout TMVA::DNN::TCudaTensor< AFloat >::GetLayout ( ) const

inline

Definition at line 186 of file CudaTensor.h.

◆ GetMatrix()

template<typename AFloat >

TCudaMatrix< AFloat > TMVA::DNN::TCudaTensor< AFloat >::GetMatrix ( ) const

inline

Definition at line 304 of file CudaTensor.h.

◆ GetNcols()

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::GetNcols ( ) const

inline

Definition at line 300 of file CudaTensor.h.

◆ GetNDim()

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::GetNDim ( ) const

inline

Definition at line 191 of file CudaTensor.h.

◆ GetNrows()

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::GetNrows ( ) const

inline

Definition at line 299 of file CudaTensor.h.

◆ GetShape()

template<typename AFloat >

const Shape_t & TMVA::DNN::TCudaTensor< AFloat >::GetShape ( ) const

inline

Definition at line 188 of file CudaTensor.h.

◆ GetSize()

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::GetSize ( ) const

inline

Definition at line 192 of file CudaTensor.h.

◆ GetStrides()

template<typename AFloat >

const Shape_t & TMVA::DNN::TCudaTensor< AFloat >::GetStrides ( ) const

inline

Definition at line 189 of file CudaTensor.h.

◆ GetWSize()

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::GetWSize ( ) const

inline

Definition at line 289 of file CudaTensor.h.

◆ InitializeCuda()

template<typename AFloat >

void TMVA::DNN::TCudaTensor< AFloat >::InitializeCuda ( )

private

Initializes all shared devices resource and makes sure that a sufficient number of curand states are allocated on the device and initialized as well as that the one-vector for the summation over columns has the right size.

◆ InitializeCurandStates()

template<typename AFloat >

void TMVA::DNN::TCudaTensor< AFloat >::InitializeCurandStates ( )

private

◆ isEqual() [1/2]

template<typename AFloat >

bool TMVA::DNN::TCudaTensor< AFloat >::isEqual	(	const AFloat *	hostBufferOther,
		size_t	otherSize
	)

inline

Definition at line 238 of file CudaTensor.h.

◆ isEqual() [2/2]

template<typename AFloat >

bool TMVA::DNN::TCudaTensor< AFloat >::isEqual ( TCudaTensor< AFloat > & other )

inline

Definition at line 220 of file CudaTensor.h.

◆ operator TMatrixT()

template<typename AFloat >

TMVA::DNN::TCudaTensor< AFloat >::operator TMatrixT ( ) const

Convert cuda matrix to Root TMatrix.

Performs synchronous data transfer.

◆ operator()() [1/3]

template<typename AFloat >

TCudaDeviceReference< AFloat > TMVA::DNN::TCudaTensor< AFloat >::operator()	(	size_t	i,
		size_t	j
	)		const

inline

Definition at line 380 of file CudaTensor.h.

◆ operator()() [2/3]

template<typename AFloat >

TCudaDeviceReference< AFloat > TMVA::DNN::TCudaTensor< AFloat >::operator()	(	size_t	i,
		size_t	j,
		size_t	k
	)		const

inline

Definition at line 394 of file CudaTensor.h.

◆ operator()() [3/3]

template<typename AFloat >

TCudaDeviceReference< AFloat > TMVA::DNN::TCudaTensor< AFloat >::operator()	(	size_t	i,
		size_t	j,
		size_t	k,
		size_t	l
	)		const

inline

Definition at line 409 of file CudaTensor.h.

◆ operator=() [1/2]

template<typename AFloat >

TCudaTensor & TMVA::DNN::TCudaTensor< AFloat >::operator= ( const TCudaTensor< AFloat > & )

default

◆ operator=() [2/2]

template<typename AFloat >

TCudaTensor & TMVA::DNN::TCudaTensor< AFloat >::operator= ( TCudaTensor< AFloat > && )

default

◆ operator[]()

template<typename AFloat >

TCudaMatrix< AFloat > TMVA::DNN::TCudaTensor< AFloat >::operator[] ( size_t i ) const

inline

Definition at line 332 of file CudaTensor.h.

◆ Print()

template<typename AFloat >

void TMVA::DNN::TCudaTensor< AFloat >::Print	(	const char *	name = `"Tensor"`,
		bool	truncate = `false`
	)		const

◆ PrintShape()

template<typename AFloat >

void TMVA::DNN::TCudaTensor< AFloat >::PrintShape ( const char * name = "Tensor" ) const

◆ Reshape()

template<typename AFloat >

TCudaTensor< AFloat > TMVA::DNN::TCudaTensor< AFloat >::Reshape ( const Shape_t & newShape ) const

inline

Definition at line 354 of file CudaTensor.h.

◆ ReshapeInPlace()

template<typename AFloat >

void TMVA::DNN::TCudaTensor< AFloat >::ReshapeInPlace ( const Shape_t & newShape )

inline

Definition at line 342 of file CudaTensor.h.

◆ SetComputeStream()

template<typename AFloat >

void TMVA::DNN::TCudaTensor< AFloat >::SetComputeStream ( cudaStream_t stream )

inline

Definition at line 216 of file CudaTensor.h.

◆ SetConstVal()

template<typename AFloat >

void TMVA::DNN::TCudaTensor< AFloat >::SetConstVal ( const AFloat constVal )

inline

Definition at line 261 of file CudaTensor.h.

◆ SetTensorDescriptor()

template<typename AFloat >

void TMVA::DNN::TCudaTensor< AFloat >::SetTensorDescriptor ( )

◆ Zero()

template<typename AFloat >

void TMVA::DNN::TCudaTensor< AFloat >::Zero ( )

inline

Definition at line 257 of file CudaTensor.h.

Member Data Documentation

◆ fDevice

template<typename AFloat >

int TMVA::DNN::TCudaTensor< AFloat >::fDevice

private

Device associated with current tensor instance.

Definition at line 120 of file CudaTensor.h.

◆ fElementBuffer

template<typename AFloat >

TCudaDeviceBuffer<AFloat> TMVA::DNN::TCudaTensor< AFloat >::fElementBuffer

private

Definition at line 124 of file CudaTensor.h.

◆ fInstances

template<typename AFloat >

std::vector<int> TMVA::DNN::TCudaTensor< AFloat >::fInstances

staticprivate

For each GPU device keep the CUDA streams in which tensors are used.

Instances belonging to the same stream on the same deviceshare a cudnn library handel to keep cudnn contexts seperated

Definition at line 111 of file CudaTensor.h.

◆ fMemoryLayout

template<typename AFloat >

MemoryLayout TMVA::DNN::TCudaTensor< AFloat >::fMemoryLayout

private

Definition at line 126 of file CudaTensor.h.

◆ fNDim

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::fNDim

private

Dimension of the tensor (first dimension is the batch size, second is the no. channels)

Definition at line 118 of file CudaTensor.h.

◆ fShape

template<typename AFloat >

Shape_t TMVA::DNN::TCudaTensor< AFloat >::fShape

private

The shape vector (size of dimensions) needs to be ordered as no.

channels, image dimensions. spatial subdimensions

Definition at line 116 of file CudaTensor.h.

◆ fSize

template<typename AFloat >

size_t TMVA::DNN::TCudaTensor< AFloat >::fSize

private

No. of elements.

Definition at line 119 of file CudaTensor.h.

◆ fStreamIndx

template<typename AFloat >

int TMVA::DNN::TCudaTensor< AFloat >::fStreamIndx

private

Cuda stream associated with current instance.

Definition at line 121 of file CudaTensor.h.

◆ fStrides

template<typename AFloat >

Shape_t TMVA::DNN::TCudaTensor< AFloat >::fStrides

private

Strides between tensor dimensions (always assume dense, non overlapping tensor)

Definition at line 117 of file CudaTensor.h.

◆ fTensorDescriptor

template<typename AFloat >

std::shared_ptr<TensorDescriptor> TMVA::DNN::TCudaTensor< AFloat >::fTensorDescriptor

private

Definition at line 123 of file CudaTensor.h.

tmva/tmva/inc/TMVA/DNN/Architectures/Cuda/CudaTensor.h

Classes

Public Types

Public Member Functions

Static Public Member Functions

Private Member Functions

Private Attributes

Static Private Attributes

Member Typedef Documentation

◆ MemoryLayout

◆ Scalar_t

◆ Shape_t

Constructor & Destructor Documentation

◆ TCudaTensor() [1/12]

◆ TCudaTensor() [2/12]

◆ TCudaTensor() [3/12]

◆ TCudaTensor() [4/12]

◆ TCudaTensor() [5/12]

◆ TCudaTensor() [6/12]

◆ TCudaTensor() [7/12]

◆ TCudaTensor() [8/12]

◆ TCudaTensor() [9/12]

◆ TCudaTensor() [10/12]

◆ TCudaTensor() [11/12]

◆ TCudaTensor() [12/12]

◆ ~TCudaTensor()

Member Function Documentation

◆ At()

◆ ComputeStridesFromShape()

◆ GetComputeStream()

◆ GetCSize()

◆ GetData() [1/2]

◆ GetData() [2/2]

◆ GetDataPointer() [1/2]

◆ GetDataPointer() [2/2]

◆ GetDataPointerAt() [1/2]

◆ GetDataPointerAt() [2/2]

◆ GetDeviceBuffer() [1/2]

◆ GetDeviceBuffer() [2/2]

◆ GetDimAt()

◆ GetFirstSize()

◆ GetFirstStride()

◆ GetHSize()

◆ GetLayout()

◆ GetMatrix()

◆ GetNcols()

◆ GetNDim()

◆ GetNrows()

◆ GetShape()

◆ GetSize()

◆ GetStrides()

◆ GetWSize()

◆ InitializeCuda()

◆ InitializeCurandStates()

◆ isEqual() [1/2]

◆ isEqual() [2/2]

◆ operator TMatrixT()

◆ operator()() [1/3]

◆ operator()() [2/3]

◆ operator()() [3/3]

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ operator[]()

◆ Print()

◆ PrintShape()

◆ Reshape()

◆ ReshapeInPlace()

◆ SetComputeStream()

◆ SetConstVal()

◆ SetTensorDescriptor()

◆ Zero()

Member Data Documentation

◆ fDevice

◆ fElementBuffer

◆ fInstances

◆ fMemoryLayout

◆ fNDim

◆ fShape

◆ fSize

◆ fStreamIndx

◆ fStrides