19#ifndef TMVA_DNN_ARCHITECTURES_CUDA_CUDATENSOR
20#define TMVA_DNN_ARCHITECTURES_CUDA_CUDATENSOR
36#define CUDNNCHECK(ans) {cudnnError((ans), __FILE__, __LINE__); }
45namespace Experimental {
82template<
typename AFloat>
138 const std::vector<size_t> & shape,
142 const std::vector<size_t> & shape,
158 if (
memlayout == MemoryLayout::ColumnMajor)
174 TCudaTensor( buffer, {
n,
m}, MemoryLayout::ColumnMajor ,0,0) {}
232 for (
size_t i = 0; i <
fSize; i++) {
246 for (
size_t i = 0; i <
fSize; i++) {
280 if (
fNDim == 2)
return 1;
306 if (
GetLayout() == MemoryLayout::ColumnMajor &&
315 return (
GetLayout() == MemoryLayout::ColumnMajor ) ?
322 return (
GetLayout() == MemoryLayout::ColumnMajor ) ?
334 return At(i).GetMatrix();
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
TCudaTensor< AFloat > At(size_t i) const
const AFloat * GetDataPointerAt(size_t i) const
void SetTensorDescriptor()
const Shape_t & GetShape() const
AFloat * GetDataPointer()
std::vector< size_t > Shape_t
TCudaTensor(const TMatrixT< AFloat > &m, size_t dim=2)
const AFloat * GetData() const
size_t GetDimAt(size_t i) const
Shape_t fStrides
Strides between tensor dimensions (always assume dense, non overlapping tensor)
int fDevice
Device associated with current tensor instance.
bool isEqual(TCudaTensor< AFloat > &other)
TCudaTensor & operator=(TCudaTensor &&)=default
TCudaDeviceReference< AFloat > operator()(size_t i, size_t j) const
TCudaMatrix< AFloat > operator[](size_t i) const
TCudaTensor(size_t bsize, size_t csize, size_t hsize, size_t wsize, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
size_t fNDim
Dimension of the tensor (first dimension is the batch size, second is the no. channels)
TCudaTensor & operator=(const TCudaTensor &)=default
cudaStream_t GetComputeStream() const
void InitializeCuda()
Initializes all shared devices resource and makes sure that a sufficient number of curand states are ...
MemoryLayout GetLayout() const
static std::vector< int > fInstances
For each GPU device keep the CUDA streams in which tensors are used.
TCudaDeviceBuffer< AFloat > & GetDeviceBuffer()
TCudaTensor(TCudaTensor &&)=default
TCudaTensor(size_t bsize, size_t csize, size_t hwsize, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
TCudaTensor(const TCudaTensor &)=default
void InitializeCurandStates()
Shape_t fShape
The shape vector (size of dimensions) needs to be ordered as no.
bool isEqual(const AFloat *hostBufferOther, size_t otherSize)
AFloat * GetDataPointerAt(size_t i)
TCudaTensor(TCudaDeviceBuffer< AFloat > buffer, size_t n, size_t m)
void PrintShape(const char *name="Tensor") const
TCudaTensor< AFloat > Reshape(const Shape_t &newShape) const
size_t fSize
No. of elements.
TCudaMatrix< AFloat > GetMatrix() const
TCudaTensor(size_t n, size_t m, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
const AFloat * GetDataPointer() const
const Shape_t & GetStrides() const
static std::vector< std::size_t > ComputeStridesFromShape(const std::vector< std::size_t > &shape, bool rowmajorLayout)
This information is needed for the multi-dimensional indexing.
void ReshapeInPlace(const Shape_t &newShape)
TCudaDeviceBuffer< AFloat > fElementBuffer
size_t GetFirstStride() const
const TCudaDeviceBuffer< AFloat > & GetDeviceBuffer() const
MemoryLayout fMemoryLayout
TCudaDeviceReference< AFloat > operator()(size_t i, size_t j, size_t k, size_t l) const
void SetComputeStream(cudaStream_t stream)
TCudaDeviceReference< AFloat > operator()(size_t i, size_t j, size_t k) const
size_t GetFirstSize() const
void SetConstVal(const AFloat constVal)
int fStreamIndx
Cuda stream associated with current instance.
void Print(const char *name="Tensor", bool truncate=false) const
std::shared_ptr< TensorDescriptor > fTensorDescriptor
MemoryLayout
Memory layout type (copy from RTensor.hxx)
create variable transformations