20#pragma GCC diagnostic push
21#pragma GCC diagnostic ignored "-Wshadow"
25#pragma GCC diagnostic pop
33template<
typename Real_t>
38 int m = (int)
A.GetNrows();
39 int k = (int)
A.GetNcols();
40 int n = (int)
B.GetNcols();
52 const Real_t * APointer =
A.GetRawDataPointer();
53 const Real_t * BPointer =
B.GetRawDataPointer();
54 Real_t * CPointer =
C.GetRawDataPointer();
57 APointer, &
m, BPointer, &k, &
beta, CPointer, &
m);
61template<
typename Real_t>
67 int m = (int)
A.GetNcols();
68 int k = (int)
A.GetNrows();
69 int n = (int)
B.GetNcols();
81 const Real_t *APointer =
A.GetRawDataPointer();
82 const Real_t *BPointer =
B.GetRawDataPointer();
83 Real_t *CPointer =
C.GetRawDataPointer();
86 APointer, &k, BPointer, &k, &
beta, CPointer, &
m);
90template<
typename Real_t>
94 const Real_t *dataA =
A.GetRawDataPointer();
95 Real_t *dataB =
B.GetRawDataPointer();
97 size_t nElements =
A.GetNoElements();
103 for (
size_t j = 0; j < nSteps; ++j) {
104 size_t idx = workerID+j;
105 if (idx >= nElements)
break;
106 dataB[idx] *= dataA[idx];
111 if (nSteps < nElements) {
113 B.GetThreadExecutor().Foreach(
f,
ROOT::TSeqI(0,nElements,nSteps));
115 for (
size_t i = 0; i < nElements ; i+= nSteps)
132template<
typename Real_t>
135 if (
A.GetNrows() !=
B.GetNrows() ||
A.GetNcols() !=
B.GetNcols()) {
136 Fatal(
"AlmostEquals",
"The passed matrices have unequal shapes.");
139 const Real_t *dataA =
A.GetRawDataPointer();
140 const Real_t *dataB =
B.GetRawDataPointer();
141 size_t nElements =
A.GetNoElements();
143 for(
size_t i = 0; i < nElements; i++) {
144 if(
fabs(dataA[i] - dataB[i]) >
epsilon)
return false;
150template<
typename Real_t>
155 int m = (int)
A.GetNrows();
156 int n = (int)
A.GetNcols();
163 const Real_t * APointer =
A.GetRawDataPointer();
164 Real_t * BPointer =
B.GetRawDataPointer();
168 &
beta, BPointer, &inc);
172template<
typename Real_t>
177 int n = (int) (
A.GetNcols() *
A.GetNrows());
180 const Real_t *
x =
A.GetRawDataPointer();
187template<
typename Real_t>
197template<
typename Real_t>
199 const std::vector<TCpuMatrix<Real_t>> &
A,
202 for (
size_t i = 0; i <
B.size(); ++i) {
203 ScaleAdd(
B[i],
A[i], alpha);
208template<
typename Real_t>
210 const std::vector<TCpuMatrix<Real_t>> &
A)
212 for (
size_t i = 0; i <
B.size(); ++i) {
218template <
typename Real_t>
226template <
typename Real_t>
234template <
typename Real_t>
237 auto f = [](
Real_t x) {
return 1.0 /
x; };
242template <
typename Real_t>
250template <
typename Real_t>
259template<
typename Real_t>
267 for (
size_t index = 0; index <
A.GetNoElements() ; ++index) {
268 a[index] =
a[index] - alpha *
m[index]/(
sqrt(
v[index]) + eps);
273template<
typename Real_t>
279 const Real_t *
b =
B.GetRawDataPointer();
280 for (
size_t index = 0; index <
A.GetNoElements() ; ++index) {
285template<
typename Real_t>
291 const Real_t *
b =
B.GetRawDataPointer();
292 for (
size_t index = 0; index <
A.GetNoElements() ; ++index) {
293 a[index] =
beta *
a[index] + (1.-
beta) *
b[index] *
b[index];
void Fatal(const char *location, const char *msgfmt,...)
A pseudo container class which is a generator of indices.
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
static size_t GetNWorkItems(size_t nelements)
static void Hadamard(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void Copy(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static void AdamUpdateSecondMom(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta)
static void TransposeMultiply(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void AdamUpdateFirstMom(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta)
static void AdamUpdate(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &M, const TCpuMatrix< Scalar_t > &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
static void ConstAdd(TCpuMatrix< Scalar_t > &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void SqrtElementWise(TCpuMatrix< Scalar_t > &A)
Square root each element of the matrix A and write the result into A.
static void Multiply(TCpuMatrix< Scalar_t > &C, const TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
Standard multiplication of two matrices A and B with the result being written into C.
static bool AlmostEquals(const TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
static void SumColumns(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
static void ReciprocalElementWise(TCpuMatrix< Scalar_t > &A)
Reciprocal each element of the matrix A and write the result into A.
static void ScaleAdd(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void ConstMult(TCpuMatrix< Scalar_t > &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void SquareElementWise(TCpuMatrix< Scalar_t > &A)
Square each element of the matrix A and write the result into A.
double beta(double x, double y)
Calculates the beta function.
void Copy(void *source, void *dest)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
void Gemv(const char *trans, const int *m, const int *n, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *x, const int *incx, const Real_t *beta, Real_t *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.
void Axpy(const int *n, const Real_t *alpha, const Real_t *x, const int *incx, Real_t *y, const int *incy)
Add the vector x scaled by alpha to y scaled by \beta.
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *B, const int *ldb, const Real_t *beta, Real_t *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
create variable transformations