Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Arithmetic.hxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12////////////////////////////////////////////////////////////
13// Implementation of Helper arithmetic functions for the //
14// multi-threaded CPU implementation of DNNs. //
15////////////////////////////////////////////////////////////
16
18
19#ifdef R__HAS_TMVACPU
21#else
23#include "TVectorT.h"
24#endif
25
26#if defined(__GNUC__)
27#pragma GCC diagnostic push
28#pragma GCC diagnostic ignored "-Wshadow"
29
30//#include "tbb/tbb.h"
31
32#pragma GCC diagnostic pop
33#endif
34
35namespace TMVA
36{
37namespace DNN
38{
39
40//____________________________________________________________________________
41template<typename AReal>
43 const TCpuMatrix<AReal> &A,
44 const TCpuMatrix<AReal> &B)
45{
46 int m = (int) A.GetNrows();
47 int k = (int) A.GetNcols();
48 int n = (int) B.GetNcols();
49
50 R__ASSERT((int) C.GetNrows() == m);
51 R__ASSERT((int) C.GetNcols() == n);
52 R__ASSERT((int) B.GetNrows() == k);
53
54#ifdef R__HAS_TMVACPU
55
56 char transa = 'N';
57 char transb = 'N';
58
59 AReal alpha = 1.0;
60 AReal beta = 0.0;
61
62 const AReal * APointer = A.GetRawDataPointer();
63 const AReal * BPointer = B.GetRawDataPointer();
64 AReal * CPointer = C.GetRawDataPointer();
65
66 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
67 APointer, &m, BPointer, &k, &beta, CPointer, &m);
68#else
69 TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
70 tmp.Mult(A,B);
71 C = tmp;
72#endif
73}
74
75//____________________________________________________________________________
76template<typename AReal>
78 const TCpuMatrix<AReal> &A,
79 const TCpuMatrix<AReal> &B,
80 AReal alpha, AReal beta)
81{
82#ifdef R__HAS_TMVACPU
83 int m = (int) A.GetNcols();
84 int k = (int) A.GetNrows();
85 int n = (int) B.GetNcols();
86
87 R__ASSERT((int) C.GetNrows() == m);
88 R__ASSERT((int) C.GetNcols() == n);
89 R__ASSERT((int) B.GetNrows() == k);
90
91 char transa = 'T';
92 char transb = 'N';
93
94 //AReal alpha = 1.0;
95 //AReal beta = 0.0;
96
97 const AReal *APointer = A.GetRawDataPointer();
98 const AReal *BPointer = B.GetRawDataPointer();
99 AReal *CPointer = C.GetRawDataPointer();
100
101 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
102 APointer, &k, BPointer, &k, &beta, CPointer, &m);
103#else
104 TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
105 tmp.TMult(A, B);
106 tmp = alpha * tmp;
107 if (beta != 0.0) {
108 TMatrixT<AReal> tmp0(C);
109 tmp = tmp + beta * tmp0;
110 }
111 C = tmp;
112#endif
113}
114
115//____________________________________________________________________________
116template<typename AReal>
118 const TCpuMatrix<AReal> &A)
119{
120 const AReal *dataA = A.GetRawDataPointer();
121 AReal *dataB = B.GetRawDataPointer();
122
123 size_t nElements = A.GetNoElements();
124 R__ASSERT(B.GetNoElements() == nElements);
125 size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
126
127 auto f = [&](UInt_t workerID)
128 {
129 for (size_t j = 0; j < nSteps; ++j) {
130 size_t idx = workerID+j;
131 if (idx >= nElements) break;
132 dataB[idx] *= dataA[idx];
133 }
134 return 0;
135 };
136
137 if (nSteps < nElements) {
138#ifdef DL_USE_MTE
139 B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
140#else
141 for (size_t i = 0; i < nElements ; i+= nSteps)
142 f(i);
143#endif
144 }
145 else {
146 f(0);
147 }
148}
149
150//____________________________________________________________________________
151template<typename AReal>
153 const TCpuTensor<AReal> &A)
154{
155 const AReal *dataA = A.GetRawDataPointer();
156 AReal *dataB = B.GetRawDataPointer();
157
158 size_t nElements = A.GetNoElements();
159 R__ASSERT(B.GetNoElements() == nElements);
160 size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
161
162 auto f = [&](UInt_t workerID)
163 {
164 for (size_t j = 0; j < nSteps; ++j) {
165 size_t idx = workerID+j;
166 if (idx >= nElements) break;
167 dataB[idx] *= dataA[idx];
168 }
169 return 0;
170 };
171
172 if (nSteps < nElements) {
173#ifdef DL_USE_MTE
175#else
176 for (size_t i = 0; i < nElements ; i+= nSteps)
177 f(i);
178#endif
179 }
180 else {
181 f(0);
182 }
183}
184
185////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
186/// \brief Checks two matrices for element-wise equality.
187/// \tparam AReal An architecture-specific floating point number type.
188/// \param A The first matrix.
189/// \param B The second matrix.
190/// \param epsilon Equality tolerance, needed to address floating point arithmetic.
191/// \return Whether the two matrices can be considered equal element-wise
192////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
193template<typename AReal>
195{
196 if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {
197 Fatal("AlmostEquals", "The passed matrices have unequal shapes.");
198 }
199
200 const AReal *dataA = A.GetRawDataPointer();
201 const AReal *dataB = B.GetRawDataPointer();
202 size_t nElements = A.GetNoElements();
203
204 for(size_t i = 0; i < nElements; i++) {
205 if(fabs(dataA[i] - dataB[i]) > epsilon) return false;
206 }
207 return true;
208}
209
210//____________________________________________________________________________
211template<typename AReal>
213 const TCpuMatrix<AReal> &A,
214 AReal alpha, AReal beta)
215{
216
217 int m = (int) A.GetNrows();
218 int n = (int) A.GetNcols();
219
220 assert((int) B.GetNoElements() >= n);
221
222#ifdef R__HAS_TMVACPU
223 int inc = 1;
224 char trans = 'T';
225
226 const AReal * APointer = A.GetRawDataPointer();
227 AReal * BPointer = B.GetRawDataPointer();
228
229 // compute B = alpha * A * I + beta * B
230
231 ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,
233 &beta, BPointer, &inc);
234#else
235 TMatrixT<AReal> tA(A);
236 tA.T();
239 assert(B.GetNrows() == 1 || B.GetNcols() == 1);
240 tmp = alpha * tA * ones + beta * tmp;
241 // copy result buffer in B matrix
242 std::copy(tmp.GetMatrixArray(), tmp.GetMatrixArray() + n, B.GetRawDataPointer());
243#endif
244}
245
246//____________________________________________________________________________
247template<typename AReal>
249 const TCpuMatrix<AReal> &A,
250 AReal alpha)
251{
252#ifdef R__HAS_TMVACPU
253 int n = (int) (A.GetNcols() * A.GetNrows());
254 int inc = 1;
255
256 const AReal *x = A.GetRawDataPointer();
257 AReal *y = B.GetRawDataPointer();
258
259 ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);
260#else
261 TMatrixT<AReal> tmp(B);
262 TReference<AReal>::ScaleAdd(tmp, A, alpha);
263 B = tmp;
264#endif
265}
266
267//____________________________________________________________________________
268template<typename AReal>
270 const TCpuMatrix<AReal> &A)
271{
272 auto f = [](AReal x) {return x;};
273 B.MapFrom(f, A);
274}
275
276
277//____________________________________________________________________________
278template<typename AReal>
280 const TCpuTensor<AReal> &A,
281 AReal alpha)
282{
283 // should re-implemented at tensor level
284 for (size_t i = 0; i < B.GetFirstSize(); ++i) {
285 TCpuMatrix<AReal> B_m = B.At(i).GetMatrix();
286 ScaleAdd(B_m, A.At(i).GetMatrix(), alpha);
287 }
288}
289
290//____________________________________________________________________________
291template<typename AReal>
293 const TCpuTensor<AReal> &A)
294{
295
296 auto f = [](AReal x) {return x;};
297 B.MapFrom(f, A);
298}
299
300//____________________________________________________________________________
301template <typename AReal>
303{
304 auto f = [beta](AReal x) { return x + beta; };
305 A.Map(f);
306}
307
308//____________________________________________________________________________
309template <typename AReal>
311{
312 auto f = [beta](AReal x) { return x * beta; };
313 A.Map(f);
314}
315
316//____________________________________________________________________________
317template <typename AReal>
319{
320 auto f = [](AReal x) { return 1.0 / x; };
321 A.Map(f);
322}
323
324//____________________________________________________________________________
325template <typename AReal>
327{
328 auto f = [](AReal x) { return x * x; };
329 A.Map(f);
330}
331
332//____________________________________________________________________________
333template <typename AReal>
335{
336 auto f = [](AReal x) { return sqrt(x); };
337 A.Map(f);
338}
339
340/// Adam updates
341//____________________________________________________________________________
342template<typename AReal>
343void TCpu<AReal>::AdamUpdate(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & M, const TCpuMatrix<AReal> & V, AReal alpha, AReal eps)
344{
345 // ADAM update the weights.
346 // Weight = Weight - alpha * M / (sqrt(V) + epsilon)
347 AReal * a = A.GetRawDataPointer();
348 const AReal * m = M.GetRawDataPointer();
349 const AReal * v = V.GetRawDataPointer();
350 for (size_t index = 0; index < A.GetNoElements() ; ++index) {
351 a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);
352 }
353}
354
355//____________________________________________________________________________
356template<typename AReal>
358{
359 // First momentum weight gradient update for ADAM
360 // Mt = beta1 * Mt-1 + (1-beta1) * WeightGradients
361 AReal * a = A.GetRawDataPointer();
362 const AReal * b = B.GetRawDataPointer();
363 for (size_t index = 0; index < A.GetNoElements() ; ++index) {
364 a[index] = beta * a[index] + (1.-beta) * b[index];
365 }
366}
367//____________________________________________________________________________
368template<typename AReal>
370{
371 // Second momentum weight gradient update for ADAM
372 // Vt = beta2 * Vt-1 + (1-beta2) * WeightGradients^2
373 AReal * a = A.GetRawDataPointer();
374 const AReal * b = B.GetRawDataPointer();
375 for (size_t index = 0; index < A.GetNoElements() ; ++index) {
376 a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];
377 }
378}
379
380
381} // DNN
382} // TMVA
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define a(i)
Definition RSha256.hxx:99
#define R__ASSERT(e)
Definition TError.h:120
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Definition TError.cxx:245
double sqrt(double)
A pseudo container class which is a generator of indices.
Definition TSeq.hxx:66
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition Config.h:83
static Config & Instance()
static function: returns TMVA instance
Definition Config.cxx:106
The TCpuMatrix class.
Definition CpuMatrix.h:86
size_t GetNcols() const
Definition CpuMatrix.h:156
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix.
Definition CpuMatrix.h:241
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition CpuMatrix.h:166
static size_t GetNWorkItems(size_t nelements)
Definition CpuMatrix.h:191
size_t GetNrows() const
Definition CpuMatrix.h:155
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition CpuMatrix.h:212
static Executor & GetThreadExecutor()
Definition CpuMatrix.h:169
size_t GetNoElements() const
Definition CpuMatrix.h:157
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition CpuTensor.h:141
size_t GetNoElements() const
Definition CpuTensor.h:149
void MapFrom(Function_t &f, const TCpuTensor< AFloat > &A)
Same as maps but takes the input values from the tensor A and writes the results in this tensor.
Definition CpuTensor.h:354
size_t GetFirstSize() const
Definition CpuTensor.h:155
TCpuTensor< AFloat > At(size_t i)
Definition CpuTensor.h:221
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition Executor.h:111
TMatrixT.
Definition TMatrixT.h:39
TMatrixT< Element > & T()
Definition TMatrixT.h:150
void TMult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
Create a matrix C such that C = A' * B.
Definition TMatrixT.cxx:853
void Mult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
General matrix multiplication. Create a matrix C such that C = A * B.
Definition TMatrixT.cxx:649
TVectorT.
Definition TVectorT.h:27
Element * GetMatrixArray()
Definition TVectorT.h:78
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
void Axpy(const int *n, const AReal *alpha, const AReal *x, const int *incx, AReal *y, const int *incy)
Add the vector x scaled by alpha to y scaled by \beta.
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const AReal *alpha, const AReal *A, const int *lda, const AReal *B, const int *ldb, const AReal *beta, AReal *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
void Gemv(const char *trans, const int *m, const int *n, const AReal *alpha, const AReal *A, const int *lda, const AReal *x, const int *incx, const AReal *beta, AReal *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.
create variable transformations
auto * m
Definition textangle.C:8
REAL epsilon
Definition triangle.c:617