Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA::DNN::TCudaTensor< AFloat > Class Template Reference

template<typename AFloat>
class TMVA::DNN::TCudaTensor< AFloat >

TCudaTensor Class.

The TCudaTensor class extends the TCudaMatrix class for dimensions > 2.

Definition at line 83 of file CudaTensor.h.

Classes

struct  TensorDescriptor
 

Public Types

using MemoryLayout = TMVA::Experimental::MemoryLayout
 
using Scalar_t = AFloat
 
using Shape_t = std::vector< size_t >
 

Public Member Functions

 TCudaTensor ()
 
 TCudaTensor (const AFloat *data, const std::vector< size_t > &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
 
 TCudaTensor (const std::vector< size_t > &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
 
 TCudaTensor (const TCudaMatrix< AFloat > &m, size_t dim=2)
 
 TCudaTensor (const TCudaTensor &)=default
 
 TCudaTensor (const TMatrixT< AFloat > &m, size_t dim=2)
 
 TCudaTensor (size_t bsize, size_t csize, size_t hsize, size_t wsize, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
 
 TCudaTensor (size_t bsize, size_t csize, size_t hwsize, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
 
 TCudaTensor (size_t n, size_t m, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
 
 TCudaTensor (TCudaDeviceBuffer< AFloat > buffer, const std::vector< size_t > &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
 
 TCudaTensor (TCudaDeviceBuffer< AFloat > buffer, size_t n, size_t m)
 
 TCudaTensor (TCudaTensor &&)=default
 
 ~TCudaTensor ()
 
TCudaTensor< AFloat > At (size_t i) const
 
cudaStream_t GetComputeStream () const
 
size_t GetCSize () const
 
AFloat * GetData ()
 
const AFloat * GetData () const
 
AFloat * GetDataPointer ()
 
const AFloat * GetDataPointer () const
 
AFloat * GetDataPointerAt (size_t i)
 
const AFloat * GetDataPointerAt (size_t i) const
 
TCudaDeviceBuffer< AFloat > & GetDeviceBuffer ()
 
const TCudaDeviceBuffer< AFloat > & GetDeviceBuffer () const
 
size_t GetDimAt (size_t i) const
 
size_t GetFirstSize () const
 
size_t GetFirstStride () const
 
size_t GetHSize () const
 
MemoryLayout GetLayout () const
 
TCudaMatrix< AFloat > GetMatrix () const
 
size_t GetNcols () const
 
size_t GetNDim () const
 
size_t GetNrows () const
 
const Shape_tGetShape () const
 
size_t GetSize () const
 
const Shape_tGetStrides () const
 
size_t GetWSize () const
 
bool isEqual (const AFloat *hostBufferOther, size_t otherSize)
 
bool isEqual (TCudaTensor< AFloat > &other)
 
 operator TMatrixT () const
 Convert cuda matrix to Root TMatrix.
 
TCudaDeviceReference< AFloat > operator() (size_t i, size_t j) const
 
TCudaDeviceReference< AFloat > operator() (size_t i, size_t j, size_t k) const
 
TCudaDeviceReference< AFloat > operator() (size_t i, size_t j, size_t k, size_t l) const
 
TCudaTensoroperator= (const TCudaTensor &)=default
 
TCudaTensoroperator= (TCudaTensor &&)=default
 
TCudaMatrix< AFloat > operator[] (size_t i) const
 
void Print (const char *name="Tensor", bool truncate=false) const
 
void PrintShape (const char *name="Tensor") const
 
TCudaTensor< AFloat > Reshape (const Shape_t &newShape) const
 
void ReshapeInPlace (const Shape_t &newShape)
 
void SetComputeStream (cudaStream_t stream)
 
void SetConstVal (const AFloat constVal)
 
void SetTensorDescriptor ()
 
void Zero ()
 

Static Public Member Functions

static std::vector< std::size_t > ComputeStridesFromShape (const std::vector< std::size_t > &shape, bool rowmajorLayout)
 

Private Member Functions

void InitializeCuda ()
 Initializes all shared devices resource and makes sure that a sufficient number of curand states are allocated on the device and initialized as well as that the one-vector for the summation over columns has the right size.
 
void InitializeCurandStates ()
 

Private Attributes

int fDevice
 Device associated with current tensor instance.
 
TCudaDeviceBuffer< AFloat > fElementBuffer
 
MemoryLayout fMemoryLayout
 
size_t fNDim
 Dimension of the tensor (first dimension is the batch size, second is the no. channels)
 
Shape_t fShape
 The shape vector (size of dimensions) needs to be ordered as no.
 
size_t fSize
 No. of elements.
 
int fStreamIndx
 Cuda stream associated with current instance.
 
Shape_t fStrides
 Strides between tensor dimensions (always assume dense, non overlapping tensor)
 
std::shared_ptr< TensorDescriptorfTensorDescriptor
 

Static Private Attributes

static std::vector< intfInstances
 For each GPU device keep the CUDA streams in which tensors are used.
 

#include <TMVA/DNN/Architectures/Cuda/CudaTensor.h>

Member Typedef Documentation

◆ MemoryLayout

template<typename AFloat >
using TMVA::DNN::TCudaTensor< AFloat >::MemoryLayout = TMVA::Experimental:: MemoryLayout

Definition at line 88 of file CudaTensor.h.

◆ Scalar_t

template<typename AFloat >
using TMVA::DNN::TCudaTensor< AFloat >::Scalar_t = AFloat

Definition at line 89 of file CudaTensor.h.

◆ Shape_t

template<typename AFloat >
using TMVA::DNN::TCudaTensor< AFloat >::Shape_t = std::vector<size_t>

Definition at line 87 of file CudaTensor.h.

Constructor & Destructor Documentation

◆ TCudaTensor() [1/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( )

◆ TCudaTensor() [2/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( const AFloat *  data,
const std::vector< size_t > &  shape,
MemoryLayout  memlayout = MemoryLayout::ColumnMajor,
int  deviceIndx = 0,
int  streamIndx = 0 
)

◆ TCudaTensor() [3/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( TCudaDeviceBuffer< AFloat >  buffer,
const std::vector< size_t > &  shape,
MemoryLayout  memlayout = MemoryLayout::ColumnMajor,
int  deviceIndx = 0,
int  streamIndx = 0 
)

◆ TCudaTensor() [4/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( const std::vector< size_t > &  shape,
MemoryLayout  memlayout = MemoryLayout::ColumnMajor,
int  deviceIndx = 0,
int  streamIndx = 0 
)

◆ TCudaTensor() [5/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( size_t  bsize,
size_t  csize,
size_t  hwsize,
MemoryLayout  memlayout = MemoryLayout::ColumnMajor,
int  deviceIndx = 0,
int  streamIndx = 0 
)
inline

Definition at line 149 of file CudaTensor.h.

◆ TCudaTensor() [6/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( size_t  bsize,
size_t  csize,
size_t  hsize,
size_t  wsize,
MemoryLayout  memlayout = MemoryLayout::ColumnMajor,
int  deviceIndx = 0,
int  streamIndx = 0 
)
inline

Definition at line 154 of file CudaTensor.h.

◆ TCudaTensor() [7/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( size_t  n,
size_t  m,
MemoryLayout  memlayout = MemoryLayout::ColumnMajor,
int  deviceIndx = 0,
int  streamIndx = 0 
)
inline

Definition at line 162 of file CudaTensor.h.

◆ TCudaTensor() [8/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( const TCudaMatrix< AFloat > &  m,
size_t  dim = 2 
)

◆ TCudaTensor() [9/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( const TMatrixT< AFloat > &  m,
size_t  dim = 2 
)
inline

Definition at line 169 of file CudaTensor.h.

◆ TCudaTensor() [10/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( TCudaDeviceBuffer< AFloat >  buffer,
size_t  n,
size_t  m 
)
inline

Definition at line 173 of file CudaTensor.h.

◆ TCudaTensor() [11/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( const TCudaTensor< AFloat > &  )
default

◆ TCudaTensor() [12/12]

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor ( TCudaTensor< AFloat > &&  )
default

◆ ~TCudaTensor()

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::~TCudaTensor ( )

Member Function Documentation

◆ At()

template<typename AFloat >
TCudaTensor< AFloat > TMVA::DNN::TCudaTensor< AFloat >::At ( size_t  i) const
inline

Definition at line 364 of file CudaTensor.h.

◆ ComputeStridesFromShape()

template<typename AFloat >
static std::vector< std::size_t > TMVA::DNN::TCudaTensor< AFloat >::ComputeStridesFromShape ( const std::vector< std::size_t > &  shape,
bool  rowmajorLayout 
)
inlinestatic

◆ GetComputeStream()

template<typename AFloat >
cudaStream_t TMVA::DNN::TCudaTensor< AFloat >::GetComputeStream ( ) const
inline

Definition at line 213 of file CudaTensor.h.

◆ GetCSize()

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::GetCSize ( ) const
inline

Definition at line 279 of file CudaTensor.h.

◆ GetData() [1/2]

template<typename AFloat >
AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetData ( )
inline

Definition at line 197 of file CudaTensor.h.

◆ GetData() [2/2]

template<typename AFloat >
const AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetData ( ) const
inline

Definition at line 196 of file CudaTensor.h.

◆ GetDataPointer() [1/2]

template<typename AFloat >
AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetDataPointer ( )
inline

Definition at line 195 of file CudaTensor.h.

◆ GetDataPointer() [2/2]

template<typename AFloat >
const AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetDataPointer ( ) const
inline

Definition at line 194 of file CudaTensor.h.

◆ GetDataPointerAt() [1/2]

template<typename AFloat >
AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetDataPointerAt ( size_t  i)
inline

Definition at line 201 of file CudaTensor.h.

◆ GetDataPointerAt() [2/2]

template<typename AFloat >
const AFloat * TMVA::DNN::TCudaTensor< AFloat >::GetDataPointerAt ( size_t  i) const
inline

Definition at line 199 of file CudaTensor.h.

◆ GetDeviceBuffer() [1/2]

template<typename AFloat >
TCudaDeviceBuffer< AFloat > & TMVA::DNN::TCudaTensor< AFloat >::GetDeviceBuffer ( )
inline

Definition at line 205 of file CudaTensor.h.

◆ GetDeviceBuffer() [2/2]

template<typename AFloat >
const TCudaDeviceBuffer< AFloat > & TMVA::DNN::TCudaTensor< AFloat >::GetDeviceBuffer ( ) const
inline

Definition at line 204 of file CudaTensor.h.

◆ GetDimAt()

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::GetDimAt ( size_t  i) const
inline

Definition at line 190 of file CudaTensor.h.

◆ GetFirstSize()

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::GetFirstSize ( ) const
inline

Definition at line 274 of file CudaTensor.h.

◆ GetFirstStride()

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::GetFirstStride ( ) const
inline

Definition at line 276 of file CudaTensor.h.

◆ GetHSize()

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::GetHSize ( ) const
inline

Definition at line 283 of file CudaTensor.h.

◆ GetLayout()

template<typename AFloat >
MemoryLayout TMVA::DNN::TCudaTensor< AFloat >::GetLayout ( ) const
inline

Definition at line 186 of file CudaTensor.h.

◆ GetMatrix()

template<typename AFloat >
TCudaMatrix< AFloat > TMVA::DNN::TCudaTensor< AFloat >::GetMatrix ( ) const
inline

Definition at line 304 of file CudaTensor.h.

◆ GetNcols()

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::GetNcols ( ) const
inline

Definition at line 300 of file CudaTensor.h.

◆ GetNDim()

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::GetNDim ( ) const
inline

Definition at line 191 of file CudaTensor.h.

◆ GetNrows()

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::GetNrows ( ) const
inline

Definition at line 299 of file CudaTensor.h.

◆ GetShape()

template<typename AFloat >
const Shape_t & TMVA::DNN::TCudaTensor< AFloat >::GetShape ( ) const
inline

Definition at line 188 of file CudaTensor.h.

◆ GetSize()

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::GetSize ( ) const
inline

Definition at line 192 of file CudaTensor.h.

◆ GetStrides()

template<typename AFloat >
const Shape_t & TMVA::DNN::TCudaTensor< AFloat >::GetStrides ( ) const
inline

Definition at line 189 of file CudaTensor.h.

◆ GetWSize()

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::GetWSize ( ) const
inline

Definition at line 289 of file CudaTensor.h.

◆ InitializeCuda()

template<typename AFloat >
void TMVA::DNN::TCudaTensor< AFloat >::InitializeCuda ( )
private

Initializes all shared devices resource and makes sure that a sufficient number of curand states are allocated on the device and initialized as well as that the one-vector for the summation over columns has the right size.

◆ InitializeCurandStates()

template<typename AFloat >
void TMVA::DNN::TCudaTensor< AFloat >::InitializeCurandStates ( )
private

◆ isEqual() [1/2]

template<typename AFloat >
bool TMVA::DNN::TCudaTensor< AFloat >::isEqual ( const AFloat *  hostBufferOther,
size_t  otherSize 
)
inline

Definition at line 238 of file CudaTensor.h.

◆ isEqual() [2/2]

template<typename AFloat >
bool TMVA::DNN::TCudaTensor< AFloat >::isEqual ( TCudaTensor< AFloat > &  other)
inline

Definition at line 220 of file CudaTensor.h.

◆ operator TMatrixT()

template<typename AFloat >
TMVA::DNN::TCudaTensor< AFloat >::operator TMatrixT ( ) const

Convert cuda matrix to Root TMatrix.

Performs synchronous data transfer.

◆ operator()() [1/3]

template<typename AFloat >
TCudaDeviceReference< AFloat > TMVA::DNN::TCudaTensor< AFloat >::operator() ( size_t  i,
size_t  j 
) const
inline

Definition at line 380 of file CudaTensor.h.

◆ operator()() [2/3]

template<typename AFloat >
TCudaDeviceReference< AFloat > TMVA::DNN::TCudaTensor< AFloat >::operator() ( size_t  i,
size_t  j,
size_t  k 
) const
inline

Definition at line 394 of file CudaTensor.h.

◆ operator()() [3/3]

template<typename AFloat >
TCudaDeviceReference< AFloat > TMVA::DNN::TCudaTensor< AFloat >::operator() ( size_t  i,
size_t  j,
size_t  k,
size_t  l 
) const
inline

Definition at line 409 of file CudaTensor.h.

◆ operator=() [1/2]

template<typename AFloat >
TCudaTensor & TMVA::DNN::TCudaTensor< AFloat >::operator= ( const TCudaTensor< AFloat > &  )
default

◆ operator=() [2/2]

template<typename AFloat >
TCudaTensor & TMVA::DNN::TCudaTensor< AFloat >::operator= ( TCudaTensor< AFloat > &&  )
default

◆ operator[]()

template<typename AFloat >
TCudaMatrix< AFloat > TMVA::DNN::TCudaTensor< AFloat >::operator[] ( size_t  i) const
inline

Definition at line 332 of file CudaTensor.h.

◆ Print()

template<typename AFloat >
void TMVA::DNN::TCudaTensor< AFloat >::Print ( const char *  name = "Tensor",
bool  truncate = false 
) const

◆ PrintShape()

template<typename AFloat >
void TMVA::DNN::TCudaTensor< AFloat >::PrintShape ( const char *  name = "Tensor") const

◆ Reshape()

template<typename AFloat >
TCudaTensor< AFloat > TMVA::DNN::TCudaTensor< AFloat >::Reshape ( const Shape_t newShape) const
inline

Definition at line 354 of file CudaTensor.h.

◆ ReshapeInPlace()

template<typename AFloat >
void TMVA::DNN::TCudaTensor< AFloat >::ReshapeInPlace ( const Shape_t newShape)
inline

Definition at line 342 of file CudaTensor.h.

◆ SetComputeStream()

template<typename AFloat >
void TMVA::DNN::TCudaTensor< AFloat >::SetComputeStream ( cudaStream_t  stream)
inline

Definition at line 216 of file CudaTensor.h.

◆ SetConstVal()

template<typename AFloat >
void TMVA::DNN::TCudaTensor< AFloat >::SetConstVal ( const AFloat  constVal)
inline

Definition at line 261 of file CudaTensor.h.

◆ SetTensorDescriptor()

template<typename AFloat >
void TMVA::DNN::TCudaTensor< AFloat >::SetTensorDescriptor ( )

◆ Zero()

template<typename AFloat >
void TMVA::DNN::TCudaTensor< AFloat >::Zero ( )
inline

Definition at line 257 of file CudaTensor.h.

Member Data Documentation

◆ fDevice

template<typename AFloat >
int TMVA::DNN::TCudaTensor< AFloat >::fDevice
private

Device associated with current tensor instance.

Definition at line 120 of file CudaTensor.h.

◆ fElementBuffer

template<typename AFloat >
TCudaDeviceBuffer<AFloat> TMVA::DNN::TCudaTensor< AFloat >::fElementBuffer
private

Definition at line 124 of file CudaTensor.h.

◆ fInstances

template<typename AFloat >
std::vector<int> TMVA::DNN::TCudaTensor< AFloat >::fInstances
staticprivate

For each GPU device keep the CUDA streams in which tensors are used.

Instances belonging to the same stream on the same deviceshare a cudnn library handel to keep cudnn contexts seperated

Definition at line 111 of file CudaTensor.h.

◆ fMemoryLayout

template<typename AFloat >
MemoryLayout TMVA::DNN::TCudaTensor< AFloat >::fMemoryLayout
private

Definition at line 126 of file CudaTensor.h.

◆ fNDim

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::fNDim
private

Dimension of the tensor (first dimension is the batch size, second is the no. channels)

Definition at line 118 of file CudaTensor.h.

◆ fShape

template<typename AFloat >
Shape_t TMVA::DNN::TCudaTensor< AFloat >::fShape
private

The shape vector (size of dimensions) needs to be ordered as no.

channels, image dimensions. spatial subdimensions

Definition at line 116 of file CudaTensor.h.

◆ fSize

template<typename AFloat >
size_t TMVA::DNN::TCudaTensor< AFloat >::fSize
private

No. of elements.

Definition at line 119 of file CudaTensor.h.

◆ fStreamIndx

template<typename AFloat >
int TMVA::DNN::TCudaTensor< AFloat >::fStreamIndx
private

Cuda stream associated with current instance.

Definition at line 121 of file CudaTensor.h.

◆ fStrides

template<typename AFloat >
Shape_t TMVA::DNN::TCudaTensor< AFloat >::fStrides
private

Strides between tensor dimensions (always assume dense, non overlapping tensor)

Definition at line 117 of file CudaTensor.h.

◆ fTensorDescriptor

template<typename AFloat >
std::shared_ptr<TensorDescriptor> TMVA::DNN::TCudaTensor< AFloat >::fTensorDescriptor
private

Definition at line 123 of file CudaTensor.h.

  • tmva/tmva/inc/TMVA/DNN/Architectures/Cuda/CudaTensor.h