Logo ROOT   6.16/01
Reference Guide
Arithmetic.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12////////////////////////////////////////////////////////////
13// Implementation of Helper arithmetic functions for the //
14// multi-threaded CPU implementation of DNNs. //
15////////////////////////////////////////////////////////////
16
19
20#pragma GCC diagnostic push
21#pragma GCC diagnostic ignored "-Wshadow"
22
23#include "tbb/tbb.h"
24
25#pragma GCC diagnostic pop
26
27namespace TMVA
28{
29namespace DNN
30{
31
32//____________________________________________________________________________
33template<typename Real_t>
35 const TCpuMatrix<Real_t> &A,
36 const TCpuMatrix<Real_t> &B)
37{
38 int m = (int) A.GetNrows();
39 int k = (int) A.GetNcols();
40 int n = (int) B.GetNcols();
41
42 R__ASSERT((int) C.GetNrows() == m);
43 R__ASSERT((int) C.GetNcols() == n);
44 R__ASSERT((int) B.GetNrows() == k);
45
46 char transa = 'N';
47 char transb = 'N';
48
49 Real_t alpha = 1.0;
50 Real_t beta = 0.0;
51
52 const Real_t * APointer = A.GetRawDataPointer();
53 const Real_t * BPointer = B.GetRawDataPointer();
54 Real_t * CPointer = C.GetRawDataPointer();
55
56 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
57 APointer, &m, BPointer, &k, &beta, CPointer, &m);
58}
59
60//____________________________________________________________________________
61template<typename Real_t>
63 const TCpuMatrix<Real_t> &A,
64 const TCpuMatrix<Real_t> &B,
65 Real_t alpha, Real_t beta)
66{
67 int m = (int) A.GetNcols();
68 int k = (int) A.GetNrows();
69 int n = (int) B.GetNcols();
70
71 R__ASSERT((int) C.GetNrows() == m);
72 R__ASSERT((int) C.GetNcols() == n);
73 R__ASSERT((int) B.GetNrows() == k);
74
75 char transa = 'T';
76 char transb = 'N';
77
78 //Real_t alpha = 1.0;
79 //Real_t beta = 0.0;
80
81 const Real_t *APointer = A.GetRawDataPointer();
82 const Real_t *BPointer = B.GetRawDataPointer();
83 Real_t *CPointer = C.GetRawDataPointer();
84
85 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
86 APointer, &k, BPointer, &k, &beta, CPointer, &m);
87}
88
89//____________________________________________________________________________
90template<typename Real_t>
92 const TCpuMatrix<Real_t> &A)
93{
94 const Real_t *dataA = A.GetRawDataPointer();
95 Real_t *dataB = B.GetRawDataPointer();
96
97 size_t nElements = A.GetNoElements();
98 R__ASSERT(B.GetNoElements() == nElements);
99 size_t nSteps = TCpuMatrix<Real_t>::GetNWorkItems(nElements);
100
101 auto f = [&](UInt_t workerID)
102 {
103 for (size_t j = 0; j < nSteps; ++j) {
104 size_t idx = workerID+j;
105 if (idx >= nElements) break;
106 dataB[idx] *= dataA[idx];
107 }
108 return 0;
109 };
110
111 if (nSteps < nElements) {
112#ifdef DL_USE_MTE
113 B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
114#else
115 for (size_t i = 0; i < nElements ; i+= nSteps)
116 f(i);
117#endif
118 }
119 else {
120 f(0);
121 }
122}
123
124////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
125/// \brief Checks two matrices for element-wise equality.
126/// \tparam Real_t An architecture-specific floating point number type.
127/// \param A The first matrix.
128/// \param B The second matrix.
129/// \param epsilon Equality tolerance, needed to address floating point arithmetic.
130/// \return Whether the two matrices can be considered equal element-wise
131////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
132template<typename Real_t>
134{
135 if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {
136 Fatal("AlmostEquals", "The passed matrices have unequal shapes.");
137 }
138
139 const Real_t *dataA = A.GetRawDataPointer();
140 const Real_t *dataB = B.GetRawDataPointer();
141 size_t nElements = A.GetNoElements();
142
143 for(size_t i = 0; i < nElements; i++) {
144 if(fabs(dataA[i] - dataB[i]) > epsilon) return false;
145 }
146 return true;
147}
148
149//____________________________________________________________________________
150template<typename Real_t>
152 const TCpuMatrix<Real_t> &A,
153 Real_t alpha, Real_t beta)
154{
155 int m = (int) A.GetNrows();
156 int n = (int) A.GetNcols();
157 int inc = 1;
158
159 // Real_t alpha = 1.0;
160 //Real_t beta = 0.0;
161 char trans = 'T';
162
163 const Real_t * APointer = A.GetRawDataPointer();
164 Real_t * BPointer = B.GetRawDataPointer();
165
166 ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,
168 &beta, BPointer, &inc);
169}
170
171//____________________________________________________________________________
172template<typename Real_t>
174 const TCpuMatrix<Real_t> &A,
175 Real_t alpha)
176{
177 int n = (int) (A.GetNcols() * A.GetNrows());
178 int inc = 1;
179
180 const Real_t *x = A.GetRawDataPointer();
181 Real_t *y = B.GetRawDataPointer();
182
183 ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);
184}
185
186//____________________________________________________________________________
187template<typename Real_t>
189 const TCpuMatrix<Real_t> &A)
190{
191 auto f = [](Real_t x) {return x;};
192 B.MapFrom(f, A);
193}
194
195
196//____________________________________________________________________________
197template<typename Real_t>
198void TCpu<Real_t>::ScaleAdd(std::vector<TCpuMatrix<Real_t>> &B,
199 const std::vector<TCpuMatrix<Real_t>> &A,
200 Real_t alpha)
201{
202 for (size_t i = 0; i < B.size(); ++i) {
203 ScaleAdd(B[i], A[i], alpha);
204 }
205}
206
207//____________________________________________________________________________
208template<typename Real_t>
209void TCpu<Real_t>::Copy(std::vector<TCpuMatrix<Real_t>> &B,
210 const std::vector<TCpuMatrix<Real_t>> &A)
211{
212 for (size_t i = 0; i < B.size(); ++i) {
213 Copy(B[i], A[i]);
214 }
215}
216
217//____________________________________________________________________________
218template <typename Real_t>
220{
221 auto f = [beta](Real_t x) { return x + beta; };
222 A.Map(f);
223}
224
225//____________________________________________________________________________
226template <typename Real_t>
228{
229 auto f = [beta](Real_t x) { return x * beta; };
230 A.Map(f);
231}
232
233//____________________________________________________________________________
234template <typename Real_t>
236{
237 auto f = [](Real_t x) { return 1.0 / x; };
238 A.Map(f);
239}
240
241//____________________________________________________________________________
242template <typename Real_t>
244{
245 auto f = [](Real_t x) { return x * x; };
246 A.Map(f);
247}
248
249//____________________________________________________________________________
250template <typename Real_t>
252{
253 auto f = [](Real_t x) { return sqrt(x); };
254 A.Map(f);
255}
256
257/// Adam updates
258//____________________________________________________________________________
259template<typename Real_t>
261{
262 // ADAM update the weights.
263 // Weight = Weight - alpha * M / (sqrt(V) + epsilon)
264 Real_t * a = A.GetRawDataPointer();
265 const Real_t * m = M.GetRawDataPointer();
266 const Real_t * v = V.GetRawDataPointer();
267 for (size_t index = 0; index < A.GetNoElements() ; ++index) {
268 a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);
269 }
270}
271
272//____________________________________________________________________________
273template<typename Real_t>
275{
276 // First momentum weight gradient update for ADAM
277 // Mt = beta1 * Mt-1 + (1-beta1) * WeightGradients
278 Real_t * a = A.GetRawDataPointer();
279 const Real_t * b = B.GetRawDataPointer();
280 for (size_t index = 0; index < A.GetNoElements() ; ++index) {
281 a[index] = beta * a[index] + (1.-beta) * b[index];
282 }
283}
284//____________________________________________________________________________
285template<typename Real_t>
287{
288 // Second momentum weight gradient update for ADAM
289 // Vt = beta2 * Vt-1 + (1-beta2) * WeightGradients^2
290 Real_t * a = A.GetRawDataPointer();
291 const Real_t * b = B.GetRawDataPointer();
292 for (size_t index = 0; index < A.GetNoElements() ; ++index) {
293 a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];
294 }
295}
296
297
298} // DNN
299} // TMVA
SVector< double, 2 > v
Definition: Dict.h:5
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
float Real_t
Definition: RtypesCore.h:64
unsigned int UInt_t
Definition: RtypesCore.h:42
#define R__ASSERT(e)
Definition: TError.h:96
void Fatal(const char *location, const char *msgfmt,...)
double sqrt(double)
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
The TCpuMatrix class.
Definition: CpuMatrix.h:89
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:152
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:180
static void Hadamard(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
Definition: Arithmetic.cxx:91
static void Copy(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static void AdamUpdateSecondMom(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta)
Definition: Arithmetic.cxx:286
static void TransposeMultiply(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
Definition: Arithmetic.cxx:62
static void AdamUpdateFirstMom(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta)
Definition: Arithmetic.cxx:274
static void AdamUpdate(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &M, const TCpuMatrix< Scalar_t > &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
Definition: Arithmetic.cxx:260
static void ConstAdd(TCpuMatrix< Scalar_t > &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.cxx:219
static void SqrtElementWise(TCpuMatrix< Scalar_t > &A)
Square root each element of the matrix A and write the result into A.
Definition: Arithmetic.cxx:251
static void Multiply(TCpuMatrix< Scalar_t > &C, const TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
Standard multiplication of two matrices A and B with the result being written into C.
Definition: Arithmetic.cxx:34
static bool AlmostEquals(const TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
Definition: Arithmetic.cxx:133
static void SumColumns(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
Definition: Arithmetic.cxx:151
static void ReciprocalElementWise(TCpuMatrix< Scalar_t > &A)
Reciprocal each element of the matrix A and write the result into A.
Definition: Arithmetic.cxx:235
static void ScaleAdd(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void ConstMult(TCpuMatrix< Scalar_t > &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.cxx:227
static void SquareElementWise(TCpuMatrix< Scalar_t > &A)
Square each element of the matrix A and write the result into A.
Definition: Arithmetic.cxx:243
double beta(double x, double y)
Calculates the beta function.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
static double B[]
static double A[]
static double C[]
void Copy(void *source, void *dest)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
void Gemv(const char *trans, const int *m, const int *n, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *x, const int *incx, const Real_t *beta, Real_t *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.
void Axpy(const int *n, const Real_t *alpha, const Real_t *x, const int *incx, Real_t *y, const int *incy)
Add the vector x scaled by alpha to y scaled by \beta.
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *B, const int *ldb, const Real_t *beta, Real_t *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
Abstract ClassifierFactory template that handles arbitrary types.
auto * m
Definition: textangle.C:8
auto * a
Definition: textangle.C:12
REAL epsilon
Definition: triangle.c:617