Logo ROOT   6.14/05
Reference Guide
Arithmetic.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////////////
13 // Implementation of Helper arithmetic functions for the //
14 // multi-threaded CPU implementation of DNNs. //
15 ////////////////////////////////////////////////////////////
16 
19 #include "tbb/tbb.h"
20 
21 namespace TMVA
22 {
23 namespace DNN
24 {
25 
26 //____________________________________________________________________________
27 template<typename Real_t>
29  const TCpuMatrix<Real_t> &A,
30  const TCpuMatrix<Real_t> &B)
31 {
32  int m = (int) A.GetNrows();
33  int k = (int) A.GetNcols();
34  int n = (int) B.GetNcols();
35 
36  R__ASSERT((int) C.GetNrows() == m);
37  R__ASSERT((int) C.GetNcols() == n);
38  R__ASSERT((int) B.GetNrows() == k);
39 
40  char transa = 'N';
41  char transb = 'N';
42 
43  Real_t alpha = 1.0;
44  Real_t beta = 0.0;
45 
46  const Real_t * APointer = A.GetRawDataPointer();
47  const Real_t * BPointer = B.GetRawDataPointer();
48  Real_t * CPointer = C.GetRawDataPointer();
49 
50  ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
51  APointer, &m, BPointer, &k, &beta, CPointer, &m);
52 }
53 
54 //____________________________________________________________________________
55 template<typename Real_t>
57  const TCpuMatrix<Real_t> &A,
58  const TCpuMatrix<Real_t> &B,
59  Real_t alpha, Real_t beta)
60 {
61  int m = (int) A.GetNcols();
62  int k = (int) A.GetNrows();
63  int n = (int) B.GetNcols();
64 
65  R__ASSERT((int) C.GetNrows() == m);
66  R__ASSERT((int) C.GetNcols() == n);
67  R__ASSERT((int) B.GetNrows() == k);
68 
69  char transa = 'T';
70  char transb = 'N';
71 
72  //Real_t alpha = 1.0;
73  //Real_t beta = 0.0;
74 
75  const Real_t *APointer = A.GetRawDataPointer();
76  const Real_t *BPointer = B.GetRawDataPointer();
77  Real_t *CPointer = C.GetRawDataPointer();
78 
79  ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
80  APointer, &k, BPointer, &k, &beta, CPointer, &m);
81 }
82 
83 //____________________________________________________________________________
84 template<typename Real_t>
86  const TCpuMatrix<Real_t> &A)
87 {
88  const Real_t *dataA = A.GetRawDataPointer();
89  Real_t *dataB = B.GetRawDataPointer();
90 
91  size_t nElements = A.GetNElements();
92  R__ASSERT(B.GetNElements() == nElements);
93  size_t nSteps = TCpuMatrix<Real_t>::GetNWorkItems(nElements);
94 
95  auto f = [&](UInt_t workerID)
96  {
97  for (size_t j = 0; j < nSteps; ++j) {
98  size_t idx = workerID+j;
99  if (idx >= nElements) break;
100  dataB[idx] *= dataA[idx];
101  }
102  return 0;
103  };
104 
105 #ifdef DL_USE_MTE
106  B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
107 #else
108  for (size_t i = 0; i < nElements ; i+= nSteps)
109  f(i);
110 #endif
111 }
112 
113 //____________________________________________________________________________
114 template<typename Real_t>
116  const TCpuMatrix<Real_t> &A,
117  Real_t alpha, Real_t beta)
118 {
119  int m = (int) A.GetNrows();
120  int n = (int) A.GetNcols();
121  int inc = 1;
122 
123  // Real_t alpha = 1.0;
124  //Real_t beta = 0.0;
125  char trans = 'T';
126 
127  const Real_t * APointer = A.GetRawDataPointer();
128  Real_t * BPointer = B.GetRawDataPointer();
129 
130  ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,
132  &beta, BPointer, &inc);
133 }
134 
135 //____________________________________________________________________________
136 template<typename Real_t>
138  const TCpuMatrix<Real_t> &A,
139  Real_t alpha)
140 {
141  int n = (int) (A.GetNcols() * A.GetNrows());
142  int inc = 1;
143 
144  const Real_t *x = A.GetRawDataPointer();
145  Real_t *y = B.GetRawDataPointer();
146 
147  ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);
148 }
149 
150 //____________________________________________________________________________
151 template<typename Real_t>
153  const TCpuMatrix<Real_t> &A)
154 {
155  auto f = [](Real_t x) {return x;};
156  B.MapFrom(f, A);
157 }
158 
159 
160 //____________________________________________________________________________
161 template<typename Real_t>
162 void TCpu<Real_t>::ScaleAdd(std::vector<TCpuMatrix<Real_t>> &B,
163  const std::vector<TCpuMatrix<Real_t>> &A,
164  Real_t alpha)
165 {
166  for (size_t i = 0; i < B.size(); ++i) {
167  ScaleAdd(B[i], A[i], alpha);
168  }
169 }
170 
171 //____________________________________________________________________________
172 template<typename Real_t>
173 void TCpu<Real_t>::Copy(std::vector<TCpuMatrix<Real_t>> &B,
174  const std::vector<TCpuMatrix<Real_t>> &A)
175 {
176  for (size_t i = 0; i < B.size(); ++i) {
177  Copy(B[i], A[i]);
178  }
179 }
180 
181 
182 } // DNN
183 } // TMVA
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
static double B[]
static void ScaleAdd(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
The TCpuMatrix class.
Definition: CpuMatrix.h:72
auto * m
Definition: textangle.C:8
static void SumColumns(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A...
Definition: Arithmetic.cxx:115
static void Copy(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
size_t GetNcols() const
Definition: CpuMatrix.h:127
#define R__ASSERT(e)
Definition: TError.h:96
#define f(i)
Definition: RSha256.hxx:104
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:164
static double A[]
double beta(double x, double y)
Calculates the beta function.
size_t GetNElements() const
Definition: CpuMatrix.h:128
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *B, const int *ldb, const Real_t *beta, Real_t *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
Double_t x[n]
Definition: legend1.C:17
static void Multiply(TCpuMatrix< Scalar_t > &C, const TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
Standard multiplication of two matrices A and B with the result being written into C...
Definition: Arithmetic.cxx:28
static void Hadamard(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A...
Definition: Arithmetic.cxx:85
void Axpy(const int *n, const Real_t *alpha, const Real_t *x, const int *incx, Real_t *y, const int *incy)
Add the vector x scaled by alpha to y scaled by .
static double C[]
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix...
Definition: CpuMatrix.h:205
unsigned int UInt_t
Definition: RtypesCore.h:42
static ROOT::TThreadExecutor & GetThreadExecutor()
Definition: CpuMatrix.h:139
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:136
void Gemv(const char *trans, const int *m, const int *n, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *x, const int *incx, const Real_t *beta, Real_t *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.
void Copy(void *source, void *dest)
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Double_t y[n]
Definition: legend1.C:17
float Real_t
Definition: RtypesCore.h:64
Abstract ClassifierFactory template that handles arbitrary types.
size_t GetNrows() const
Definition: CpuMatrix.h:126
static void TransposeMultiply(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C...
Definition: Arithmetic.cxx:56
const Int_t n
Definition: legend1.C:16