29 int i = blockDim.y * blockIdx.y + threadIdx.y;
30 int j = blockDim.x * blockIdx.x + threadIdx.x;
31 int tid = i * gridDim.x + j;
32 curand_init(seed + tid, 0, tid, state + tid);
37template<
typename AFloat>
39template<
typename AFloat>
41template<
typename AFloat>
43template<
typename AFloat>
45template<
typename AFloat>
47template<
typename AFloat>
49template<
typename AFloat>
51template <
typename AFloat>
56template<
typename AFloat>
64template<
typename AFloat>
72template<
typename AFloat>
81 for (
size_t j = 0; j <
fNCols; j++) {
82 for (
size_t i = 0; i <
fNRows; i++) {
83 buffer[index] =
static_cast<AFloat
>(Host(i, j));
89 cudaMemcpyHostToDevice);
93template<
typename AFloat>
102template <
typename AFloat>
113 std::cout <<
"***** Warning - initialize a BIG curandstate for matrix " <<
fNRows <<
"," <<
fNCols <<
" nstate "
128 AFloat * buffer =
new AFloat[
fNRows];
129 for (
size_t i = 0; i <
fNRows; i++) {
133 cudaMemcpyHostToDevice);
139template<
typename AFloat>
149template<
typename AFloat>
156 cudaMemcpyDeviceToHost);
159 for (
size_t j = 0; j <
fNCols; j++) {
160 for (
size_t i = 0; i <
fNRows; i++) {
161 hostMatrix(i, j) =
static_cast<Double_t>(buffer[index]);
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
TCudaDeviceBuffer< AFloat > fElementBuffer
static curandState_t * fCurandStates
static AFloat * fDeviceReturn
Buffer for kernel return values.
size_t GetNoElements() const
void InitializeCuda()
Initializes all shared devices resource and makes sure that a sufficient number of curand states are ...
static Bool_t gInitializeCurand
static AFloat * fOnes
Vector used for summations of columns.
static size_t fNCurandStates
static cublasHandle_t fCublasHandle
static size_t fInstances
Current number of matrix instances.
void InitializeCurandStates()
static size_t fNOnes
Current length of the one vector.
static dim3 BlockDims2D()
static dim3 GridDims2D(int nrows, int ncols)
static int NThreads(const AMatrix &A)
__global__ void CurandInitializationKernel(unsigned long long seed, curandState_t *state)
create variable transformations