TCudaTensor Class.
The TCudaTensor class extends the TCudaMatrix class for dimensions > 2.
Definition at line 83 of file CudaTensor.h.
Classes | |
struct | TensorDescriptor |
Public Types | |
using | MemoryLayout = TMVA::Experimental::MemoryLayout |
using | Scalar_t = AFloat |
using | Shape_t = std::vector< size_t > |
Public Member Functions | |
TCudaTensor () | |
TCudaTensor (const AFloat *data, const std::vector< size_t > &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0) | |
TCudaTensor (const std::vector< size_t > &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0) | |
TCudaTensor (const TCudaMatrix< AFloat > &m, size_t dim=2) | |
TCudaTensor (const TCudaTensor &)=default | |
TCudaTensor (size_t bsize, size_t csize, size_t hsize, size_t wsize, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0) | |
TCudaTensor (size_t bsize, size_t csize, size_t hwsize, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0) | |
TCudaTensor (size_t n, size_t m, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0) | |
TCudaTensor (TCudaDeviceBuffer< AFloat > buffer, const std::vector< size_t > &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0) | |
TCudaTensor (TCudaDeviceBuffer< AFloat > buffer, size_t n, size_t m) | |
TCudaTensor (TCudaTensor &&)=default | |
~TCudaTensor () | |
TCudaTensor< AFloat > | At (size_t i) const |
cudaStream_t | GetComputeStream () const |
size_t | GetCSize () const |
AFloat * | GetData () |
const AFloat * | GetData () const |
AFloat * | GetDataPointer () |
const AFloat * | GetDataPointer () const |
AFloat * | GetDataPointerAt (size_t i) |
const AFloat * | GetDataPointerAt (size_t i) const |
TCudaDeviceBuffer< AFloat > & | GetDeviceBuffer () |
const TCudaDeviceBuffer< AFloat > & | GetDeviceBuffer () const |
size_t | GetDimAt (size_t i) const |
size_t | GetFirstSize () const |
size_t | GetFirstStride () const |
size_t | GetHSize () const |
MemoryLayout | GetLayout () const |
TCudaMatrix< AFloat > | GetMatrix () const |
size_t | GetNcols () const |
size_t | GetNDim () const |
size_t | GetNrows () const |
const Shape_t & | GetShape () const |
size_t | GetSize () const |
const Shape_t & | GetStrides () const |
size_t | GetWSize () const |
bool | isEqual (const AFloat *hostBufferOther, size_t otherSize) |
bool | isEqual (TCudaTensor< AFloat > &other) |
operator TMatrixT< AFloat > () const | |
Convert cuda matrix to Root TMatrix. More... | |
TCudaDeviceReference< AFloat > | operator() (size_t i, size_t j) const |
TCudaDeviceReference< AFloat > | operator() (size_t i, size_t j, size_t k) const |
TCudaDeviceReference< AFloat > | operator() (size_t i, size_t j, size_t k, size_t l) const |
TCudaTensor & | operator= (const TCudaTensor &)=default |
TCudaTensor & | operator= (TCudaTensor &&)=default |
void | Print (const char *name="Tensor", bool truncate=false) const |
void | PrintShape (const char *name="Tensor") const |
TCudaTensor< AFloat > | Reshape (const Shape_t &newShape) const |
void | ReshapeInPlace (const Shape_t &newShape) |
void | SetComputeStream (cudaStream_t stream) |
void | SetConstVal (const AFloat constVal) |
void | SetTensorDescriptor () |
void | Zero () |
Static Public Member Functions | |
static std::vector< std::size_t > | ComputeStridesFromShape (const std::vector< std::size_t > &shape, bool rowmajorLayout) |
Private Member Functions | |
void | InitializeCuda () |
Initializes all shared devices resource and makes sure that a sufficient number of curand states are allocated on the device and initialized as well as that the one-vector for the summation over columns has the right size. More... | |
void | InitializeCurandStates () |
Private Attributes | |
int | fDevice |
Device associated with current tensor instance. More... | |
TCudaDeviceBuffer< AFloat > | fElementBuffer |
MemoryLayout | fMemoryLayout |
size_t | fNDim |
Dimension of the tensor (first dimension is the batch size, second is the no. channels) More... | |
Shape_t | fShape |
The shape vector (size of dimensions) needs to be ordered as no. More... | |
size_t | fSize |
No. of elements. More... | |
int | fStreamIndx |
Cuda stream associated with current instance. More... | |
Shape_t | fStrides |
Strides between tensor dimensions (always assume dense, non overlapping tensor) More... | |
std::shared_ptr< TensorDescriptor > | fTensorDescriptor |
Static Private Attributes | |
static std::vector< int > | fInstances |
For each GPU device keep the CUDA streams in which tensors are used. More... | |
#include <TMVA/DNN/Architectures/Cuda/CudaTensor.h>
using TMVA::DNN::TCudaTensor< AFloat >::MemoryLayout = TMVA::Experimental:: MemoryLayout |
Definition at line 88 of file CudaTensor.h.
using TMVA::DNN::TCudaTensor< AFloat >::Scalar_t = AFloat |
Definition at line 89 of file CudaTensor.h.
using TMVA::DNN::TCudaTensor< AFloat >::Shape_t = std::vector<size_t> |
Definition at line 87 of file CudaTensor.h.
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor | ( | ) |
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor | ( | const AFloat * | data, |
const std::vector< size_t > & | shape, | ||
MemoryLayout | memlayout = MemoryLayout::ColumnMajor , |
||
int | deviceIndx = 0 , |
||
int | streamIndx = 0 |
||
) |
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor | ( | TCudaDeviceBuffer< AFloat > | buffer, |
const std::vector< size_t > & | shape, | ||
MemoryLayout | memlayout = MemoryLayout::ColumnMajor , |
||
int | deviceIndx = 0 , |
||
int | streamIndx = 0 |
||
) |
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor | ( | const std::vector< size_t > & | shape, |
MemoryLayout | memlayout = MemoryLayout::ColumnMajor , |
||
int | deviceIndx = 0 , |
||
int | streamIndx = 0 |
||
) |
|
inline |
Definition at line 149 of file CudaTensor.h.
|
inline |
Definition at line 154 of file CudaTensor.h.
|
inline |
Definition at line 162 of file CudaTensor.h.
TMVA::DNN::TCudaTensor< AFloat >::TCudaTensor | ( | const TCudaMatrix< AFloat > & | m, |
size_t | dim = 2 |
||
) |
|
inline |
Definition at line 169 of file CudaTensor.h.
|
default |
|
default |
TMVA::DNN::TCudaTensor< AFloat >::~TCudaTensor | ( | ) |
|
inline |
Definition at line 360 of file CudaTensor.h.
|
inlinestatic |
|
inline |
Definition at line 209 of file CudaTensor.h.
|
inline |
Definition at line 275 of file CudaTensor.h.
|
inline |
Definition at line 193 of file CudaTensor.h.
|
inline |
Definition at line 192 of file CudaTensor.h.
|
inline |
Definition at line 191 of file CudaTensor.h.
|
inline |
Definition at line 190 of file CudaTensor.h.
|
inline |
Definition at line 197 of file CudaTensor.h.
|
inline |
Definition at line 195 of file CudaTensor.h.
|
inline |
Definition at line 201 of file CudaTensor.h.
|
inline |
Definition at line 200 of file CudaTensor.h.
|
inline |
Definition at line 186 of file CudaTensor.h.
|
inline |
Definition at line 270 of file CudaTensor.h.
|
inline |
Definition at line 272 of file CudaTensor.h.
|
inline |
Definition at line 279 of file CudaTensor.h.
|
inline |
Definition at line 182 of file CudaTensor.h.
|
inline |
Definition at line 300 of file CudaTensor.h.
|
inline |
Definition at line 296 of file CudaTensor.h.
|
inline |
Definition at line 187 of file CudaTensor.h.
|
inline |
Definition at line 295 of file CudaTensor.h.
|
inline |
Definition at line 184 of file CudaTensor.h.
|
inline |
Definition at line 188 of file CudaTensor.h.
|
inline |
Definition at line 185 of file CudaTensor.h.
|
inline |
Definition at line 285 of file CudaTensor.h.
|
private |
Initializes all shared devices resource and makes sure that a sufficient number of curand states are allocated on the device and initialized as well as that the one-vector for the summation over columns has the right size.
|
private |
|
inline |
Definition at line 234 of file CudaTensor.h.
|
inline |
Definition at line 216 of file CudaTensor.h.
TMVA::DNN::TCudaTensor< AFloat >::operator TMatrixT< AFloat > | ( | ) | const |
Convert cuda matrix to Root TMatrix.
Performs synchronous data transfer.
|
inline |
Definition at line 376 of file CudaTensor.h.
|
inline |
Definition at line 390 of file CudaTensor.h.
|
inline |
Definition at line 405 of file CudaTensor.h.
|
default |
|
default |
void TMVA::DNN::TCudaTensor< AFloat >::Print | ( | const char * | name = "Tensor" , |
bool | truncate = false |
||
) | const |
void TMVA::DNN::TCudaTensor< AFloat >::PrintShape | ( | const char * | name = "Tensor" | ) | const |
|
inline |
Definition at line 343 of file CudaTensor.h.
|
inline |
Definition at line 331 of file CudaTensor.h.
|
inline |
Definition at line 212 of file CudaTensor.h.
|
inline |
Definition at line 257 of file CudaTensor.h.
void TMVA::DNN::TCudaTensor< AFloat >::SetTensorDescriptor | ( | ) |
|
inline |
Definition at line 253 of file CudaTensor.h.
|
private |
Device associated with current tensor instance.
Definition at line 120 of file CudaTensor.h.
|
private |
Definition at line 124 of file CudaTensor.h.
|
staticprivate |
For each GPU device keep the CUDA streams in which tensors are used.
Instances belonging to the same stream on the same deviceshare a cudnn library handel to keep cudnn contexts seperated
Definition at line 111 of file CudaTensor.h.
|
private |
Definition at line 126 of file CudaTensor.h.
|
private |
Dimension of the tensor (first dimension is the batch size, second is the no. channels)
Definition at line 118 of file CudaTensor.h.
|
private |
The shape vector (size of dimensions) needs to be ordered as no.
channels, image dimensions. spatial subdimensions
Definition at line 116 of file CudaTensor.h.
|
private |
No. of elements.
Definition at line 119 of file CudaTensor.h.
|
private |
Cuda stream associated with current instance.
Definition at line 121 of file CudaTensor.h.
|
private |
Strides between tensor dimensions (always assume dense, non overlapping tensor)
Definition at line 117 of file CudaTensor.h.
|
private |
Definition at line 123 of file CudaTensor.h.