36 float alpha = 1.0, beta = 0.0;
43 CUBLAS_OP_N, CUBLAS_OP_N,
48 C.GetDataPointer(),
m);
50 C.SetComputeStream(s);
63 double alpha = 1.0, beta = 0.0;
70 CUBLAS_OP_N, CUBLAS_OP_N,
75 C.GetDataPointer(),
m);
77 C.SetComputeStream(s);
85 float alpha,
float beta)
98 CUBLAS_OP_T, CUBLAS_OP_N,
103 C.GetDataPointer(),
m);
105 C.SetComputeStream(s);
112 double alpha,
double beta)
125 CUBLAS_OP_T, CUBLAS_OP_N,
130 C.GetDataPointer(),
m);
132 C.SetComputeStream(s);
136template<
typename AFloat>
143 ::TMVA::DNN::Cuda::Hadamard<<<gridDims, blockDims, 0, s>>>(B.
GetDataPointer(),
150template<
typename AFloat>
164 ::TMVA::DNN::Cuda::Hadamard<<<gridDims, blockDims, 0, s>>>(B.
GetDataPointer(),
171template<
typename AFloat>
179 ::TMVA::DNN::Cuda::ReduceMatrix<<<gridDims, blockDims, 0, s>>>(
191 float alpha,
float beta)
215 double alpha,
double beta)
242 float alpha = 1.0, beta = 0.0;
265 double alpha = 1.0, beta = 0.0;
290template<
typename AFloat>
294 Fatal(
"AlmostEquals",
"The passed matrices have unequal shapes.");
302 cudaMalloc((
void**) &dResult,
sizeof(
bool));
303 cudaMemset(dResult, 1,
sizeof(
bool));
309 cudaMemcpy(&
result, dResult,
sizeof(
bool), cudaMemcpyDeviceToHost);
342template<
typename AFloat>
351 ScaleAdd(B_m, A_m, alpha);
356template<
typename AFloat>
362 ::TMVA::DNN::Cuda::ConstAdd<<<gridDims, blockDims, 0, s>>>(
370template<
typename AFloat>
376 ::TMVA::DNN::Cuda::ConstMult<<<gridDims, blockDims, 0, s>>>(
384template<
typename AFloat>
390 ::TMVA::DNN::Cuda::ReciprocalElementWise<<<gridDims, blockDims, 0, s>>>(
397template<
typename AFloat>
403 ::TMVA::DNN::Cuda::SquareElementWise<<<gridDims, blockDims, 0, s>>>(
410template<
typename AFloat>
416 ::TMVA::DNN::Cuda::SqrtElementWise<<<gridDims, blockDims, 0, s>>>(
424template<
typename AFloat>
430 ::TMVA::DNN::Cuda::AdamUpdate<<<gridDims, blockDims, 0, s>>>(
440template<
typename AFloat>
446 ::TMVA::DNN::Cuda::AdamUpdateFirstMom<<<gridDims, blockDims, 0, s>>>(
454template<
typename AFloat>
460 ::TMVA::DNN::Cuda::AdamUpdateSecondMom<<<gridDims, blockDims, 0, s>>>(
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
static AFloat GetDeviceReturn()
Transfer the value in the device return buffer to the host.
void SetComputeStream(cudaStream_t stream)
cudaStream_t GetComputeStream() const
size_t GetNoElements() const
static AFloat * GetDeviceReturnPointer()
Return device pointer to the device return buffer.
const cublasHandle_t & GetCublasHandle() const
static void ResetDeviceReturn(AFloat value=0.0)
Set the return buffer on the device to the specified value.
const AFloat * GetDataPointer() const
TCudaTensor< AFloat > At(size_t i) const
cudaStream_t GetComputeStream() const
const AFloat * GetDataPointer() const
size_t GetFirstStride() const
void SetComputeStream(cudaStream_t stream)
size_t GetFirstSize() const
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
static Scalar_t Sum(const Matrix_t &A)
Compute the sum of all elements in A.
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static dim3 BlockDims2D()
static dim3 GridDims2D(int nrows, int ncols)
create variable transformations