19#ifndef TMVA_DNN_ARCHITECTURES_CUDA_CUDAMATRIX 
   20#define TMVA_DNN_ARCHITECTURES_CUDA_CUDAMATRIX 
   24#include "RConfigure.h" 
   27#include "cuda_runtime.h" 
   29#include "curand_kernel.h" 
   34#define CUDACHECK(ans) {cudaError((ans), __FILE__, __LINE__); } 
   57template<
typename AFloat>
 
  101template<
typename AFloat>
 
  203      if (abort) 
exit(code);
 
 
  208template<
typename AFloat>
 
  216template<
typename AFloat>
 
  220    cudaMemcpy(& buffer, fDevicePointer, 
sizeof(AFloat),
 
 
  226template<
typename AFloat>
 
  234template<
typename AFloat>
 
  237   AFloat buffer = 
value;
 
  238   cudaMemcpy(fDevicePointer, & buffer, 
sizeof(AFloat),
 
 
  243template<
typename AFloat>
 
  247   cudaMemcpy(& buffer, fDevicePointer, 
sizeof(AFloat),
 
  250   cudaMemcpy(fDevicePointer, & buffer, 
sizeof(AFloat),
 
 
  255template<
typename AFloat>
 
  259   cudaMemcpy(& buffer, fDevicePointer, 
sizeof(AFloat),
 
  262   cudaMemcpy(fDevicePointer, & buffer, 
sizeof(AFloat),
 
 
  267template<
typename AFloat>
 
  270   return fElementBuffer.GetComputeStream();
 
 
  274template<
typename AFloat>
 
  277   return fElementBuffer.SetComputeStream(stream);
 
 
  281template<
typename AFloat>
 
  292template<
typename AFloat>
 
  295   AFloat buffer = 
value;
 
 
  300template<
typename AFloat>
 
  309template<
typename AFloat>
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
 
void operator-=(AFloat value)
 
TCudaDeviceReference(AFloat *devicePointer)
 
void operator=(const TCudaDeviceReference &other)
 
void operator+=(AFloat value)
 
TCudaDeviceBuffer< AFloat > fElementBuffer
 
TCudaMatrix & operator=(const TCudaMatrix &)=default
 
static AFloat GetDeviceReturn()
Transfer the value in the device return buffer to the host.
 
void SetComputeStream(cudaStream_t stream)
 
cudaStream_t GetComputeStream() const
 
size_t GetNoElements() const
 
void InitializeCuda()
Initializes all shared devices resource and makes sure that a sufficient number of curand states are ...
 
static Bool_t gInitializeCurand
 
TCudaDeviceReference< AFloat > operator()(size_t i, size_t j) const
Access to elements of device matrices provided through TCudaDeviceReference class.
 
static AFloat * GetDeviceReturnPointer()
Return device pointer to the device return buffer.
 
static curandState_t * fCurandStates
 
const cublasHandle_t & GetCublasHandle() const
 
static void ResetDeviceReturn(AFloat value=0.0)
Set the return buffer on the device to the specified value.
 
const AFloat * GetDataPointer() const
 
static size_t fNCurandStates
 
TCudaMatrix(const TCudaMatrix &)=default
 
TCudaDeviceBuffer< AFloat > GetDeviceBuffer() const
 
static AFloat * fDeviceReturn
Buffer for kernel return values.
 
void Synchronize(const TCudaMatrix &) const
Blocking synchronization with the associated compute stream, if it's not the default stream.
 
static AFloat * GetOnes()
 
static AFloat * fOnes
Vector used for summations of columns.
 
static cublasHandle_t fCublasHandle
 
static size_t fInstances
Current number of matrix instances.
 
TCudaMatrix & operator=(TCudaMatrix &&)=default
 
void InitializeCurandStates()
 
AFloat * GetDataPointer()
 
static size_t fNOnes
Current length of the one vector.
 
TCudaMatrix(TCudaMatrix &&)=default
 
static curandState_t * GetCurandStatesPointer()
 
void cudaError(cudaError_t code, const char *file, int line, bool abort=true)
Function to check cuda return code.
 
create variable transformations