Logo ROOT  
Reference Guide
CpuMatrix.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////
13// Definition of the CpuMatrix class used to represent //
14// weight and bias matrices in neural nets. //
15//////////////////////////////////////////////////////////
16
17#ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
18#define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
19
20#ifdef R__USE_IMT
21#define DL_USE_MTE // use MT with tbb
22#endif
23
24#include <cstddef>
25#include <vector>
26
27#include "TMatrix.h"
28#include "TMVA/Config.h"
29#include "CpuBuffer.h"
30
31// #define DEBUG_TMVA_TCPUMATRIX
32#if defined(DEBUG_TMVA_TCPUMATRIX)
33/*
34 * Debug(!) function for printing matrices.
35 *
36 * Prints the input expression `mat` using preprocessor directives (with
37 * `#mat`). E.g. `PrintMatrix(matA, "Test")` _could_ generate
38 * "matA is null pointer".
39 *
40 * Note: This is a preprocessor macro. It does _not_ respect namespaces.
41 *
42 * @param mat Matrix to print
43 * @param text Name of matrix
44 */
45#define TMVA_DNN_PrintTCpuMatrix(mat, text) \
46 { \
47 auto _dpointer = mat.GetRawDataPointer(); \
48 if (_dpointer == NULL) { \
49 std::cout << #mat << " is null pointer" << std::endl; \
50 exit(1); \
51 } \
52 auto _nrows = mat.GetNrows(); \
53 auto _ncols = mat.GetNcols(); \
54 std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")" \
55 << "--------------------" << std::endl; \
56 for (size_t _i = 0; _i < _nrows; _i++) { \
57 for (size_t _j = 0; _j < _ncols; _j++) { \
58 std::cout << mat(_i, _j); \
59 if (_j < _ncols - 1) \
60 std::cout << ","; \
61 } \
62 std::cout << std::endl; \
63 } \
64 }
65#else
66#define TMVA_DNN_PrintTCpuMatrix(mat, text)
67#endif
68
69namespace TMVA {
70namespace DNN {
71
72/** The TCpuMatrix class.
73 *
74 * Matrix class for multi-threaded CPU architectures. Uses the TCpuBuffer
75 * class to store the matrices in column-major format for compatibility with
76 * BLAS. Provides Map and MapFrom member functions to simplify the application of
77 * activation functions and derivatives to matrices.
78 *
79 * Copying and assignment of TCpuMatrix objects only performs shallow copies, i.e.
80 * copying is fast and the resulting objects share the element data.
81 *
82 * \tparam AFloat The floating point type used to represent the matrix elements.
83 */
84//______________________________________________________________________________
85template <typename AFloat>
87private:
88 static std::vector<AFloat> fOnes; ///< Vector filled with ones used for BLAS calls.
89
90public:
91 TCpuBuffer<AFloat> fBuffer; ///< The buffer holding the matrix elements
92 ///< in column-major format.
93private:
94 size_t fNCols;
95 size_t fNRows;
96
97public:
98 // friend class TCpuTensor<AFloat>;
99
100 /** Returns pointer to a vector holding only ones with a guaranteed length
101 * of the number of columns of every instantiated CpuMatrix object. */
102
103
105 const TCpuBuffer<AFloat>& GetBuffer() const {return fBuffer;}
106 // for compatible API with Tensor and Matrix in Cuda
108 const TCpuBuffer<AFloat> &GetDeviceBuffer() const { return fBuffer; }
109
110 static const AFloat *GetOnePointer() { return fOnes.data(); }
111
112 static size_t GetOnePointerSize() { return fOnes.size(); }
113
114 static void InitializeOneVector(size_t n);
115
117
118 /** Construct matrix and allocate space for its elements. */
119 TCpuMatrix(size_t nRows, size_t nCols);
120 /** Construct a TCpuMatrix object by (deeply) copying from a
121 * TMatrixT<Double_t> matrix. */
123 /** Construct a m-times-n matrix from the given buffer. The size must of
124 * course match. */
125 TCpuMatrix(const TCpuBuffer<AFloat> &buffer, size_t m, size_t n);
126
127 /** copy from a TMAtrixT . Deep copy without re-creating a new buffer */
129
130 // N.B the default copy constructor does a shallow copy (NOT a deep one) !
131 TCpuMatrix(const TCpuMatrix &) = default;
132 TCpuMatrix(TCpuMatrix &&) = default;
133 TCpuMatrix &operator=(const TCpuMatrix &) = default;
135 ~TCpuMatrix() = default;
136
137 /** Clear content of the matrix and initialize to zero elements
138 */
139 void Zero();
140
141 /** Convert to a TMatrixT<AFloat_t> object. Performs a deep copy of the matrix
142 * elements. */
143 operator TMatrixT<AFloat>() const;
144
145 /** Map the given function over the matrix elements. Executed in parallel
146 * using TThreadExecutor. */
147 template <typename Function_t>
148 void Map(Function_t &f);
149
150 /** Same as maps but takes the input values from the matrix \p A and writes
151 * the results in this matrix. */
152 template <typename Function_t>
153 void MapFrom(Function_t &f, const TCpuMatrix &A);
154
155 size_t GetNrows() const { return fNRows; }
156 size_t GetNcols() const { return fNCols; }
157 size_t GetNoElements() const { return fNRows * fNCols; }
158 size_t GetSize() const { return fNRows * fNCols; }
159
160 /** Return matrix element in row \p i and column \p j. */
161 AFloat operator()(size_t i, size_t j) const { return fBuffer[j * fNRows + i]; }
162 AFloat &operator()(size_t i, size_t j) { return fBuffer[j * fNRows + i]; }
163
164 /** Return raw pointer to the elements stored contiguously in column-major
165 * order. */
166 AFloat *GetRawDataPointer() { return fBuffer; }
167 const AFloat *GetRawDataPointer() const { return fBuffer; }
168
170
171 // static function to get the number of elements for task
172 static size_t GetNWorkItems(size_t nelements);
173
174 // print matrix
175 void Print() const
176 {
177 TCpuMatrix cpuMatrix = *this;
178 TMVA_DNN_PrintTCpuMatrix(cpuMatrix, "CpuMatrix");
179 }
180
181private:
182 void Initialize();
183};
184
185template <typename AFloat>
186std::vector<AFloat> TCpuMatrix<AFloat>::fOnes{};
187
188// Inline Functions.
189//______________________________________________________________________________
190template <typename AFloat>
191size_t TCpuMatrix<AFloat>::GetNWorkItems(size_t nElements)
192{
193 // nElements should have at least 100
194 // const size_t nWorkers = TMVA::Config::Instance().GetNCpu();
195 // return (nElements > nWorkers) ? (int) nElements/nWorkers : 1;
196 const size_t minElements = 1000;
197 const size_t nCpu = TMVA::Config::Instance().GetNCpu();
198 if (nElements <= minElements)
199 return nElements;
200 if (nElements < nCpu * minElements) {
201 size_t nt = nElements / minElements;
202 return nElements / nt;
203 }
204 return nElements / nCpu;
205 // if (nElements < nCpu*20) return nElements/nCpu;
206 // return nElements/(nCpu*10);
207}
208
209//______________________________________________________________________________
210template <typename AFloat>
211template <typename Function_t>
212inline void TCpuMatrix<AFloat>::Map(Function_t &f)
213{
214 AFloat *data = GetRawDataPointer();
215 size_t nelements = GetNoElements();
216 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
217
218 auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {
219 size_t jMax = std::min(workerID + nsteps, nelements);
220 for (size_t j = workerID; j < jMax; ++j) {
221 data[j] = f(data[j]);
222 }
223 return 0;
224 };
225
226 if (nsteps < nelements) {
227 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
228
229 // for (size_t i = 0; i < nelements; i+=nsteps)
230 // ff(i);
231
232 } else {
233 R__ASSERT(nelements == nsteps);
234 ff(0);
235 }
236}
237
238//______________________________________________________________________________
239template <typename AFloat>
240template <typename Function_t>
241inline void TCpuMatrix<AFloat>::MapFrom(Function_t &f, const TCpuMatrix &A)
242{
243 AFloat *dataB = GetRawDataPointer();
244 const AFloat *dataA = A.GetRawDataPointer();
245
246 size_t nelements = GetNoElements();
247 R__ASSERT(nelements == A.GetNoElements());
248 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
249
250 auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {
251 size_t jMax = std::min(workerID + nsteps, nelements);
252 for (size_t j = workerID; j < jMax; ++j) {
253 dataB[j] = f(dataA[j]);
254 }
255 return 0;
256 };
257 if (nsteps < nelements) {
258 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
259 // for (size_t i = 0; i < nelements; i+=nsteps)
260 // ff(i);
261
262 } else {
263 R__ASSERT(nelements == nsteps);
264 ff(0);
265 }
266}
267//______________________________________________________________________________
268template <typename AFloat>
270{
271 for (size_t j = 0; j < fNCols; j++) {
272 for (size_t i = 0; i < fNRows; i++) {
273 (*this)(i, j) = 0;
274 }
275 }
276}
277
278} // namespace DNN
279} // namespace TMVA
280
281#endif
#define TMVA_DNN_PrintTCpuMatrix(mat, text)
Definition: CpuMatrix.h:66
#define f(i)
Definition: RSha256.hxx:104
#define R__ASSERT(e)
Definition: TError.h:96
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition: Config.h:83
UInt_t GetNCpu()
Definition: Config.h:72
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:107
The TCpuMatrix class.
Definition: CpuMatrix.h:86
TCpuMatrix(TCpuMatrix &&)=default
static std::vector< AFloat > fOnes
Vector filled with ones used for BLAS calls.
Definition: CpuMatrix.h:88
TCpuMatrix & operator=(const TCpuMatrix &)=default
TCpuMatrix(const TCpuBuffer< AFloat > &buffer, size_t m, size_t n)
Construct a m-times-n matrix from the given buffer.
size_t GetNcols() const
Definition: CpuMatrix.h:156
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix.
Definition: CpuMatrix.h:241
static size_t GetOnePointerSize()
Definition: CpuMatrix.h:112
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:269
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:166
AFloat & operator()(size_t i, size_t j)
Definition: CpuMatrix.h:162
void Print() const
Definition: CpuMatrix.h:175
static const AFloat * GetOnePointer()
Definition: CpuMatrix.h:110
TCpuMatrix(const TMatrixT< AFloat > &)
Construct a TCpuMatrix object by (deeply) copying from a TMatrixT<Double_t> matrix.
AFloat operator()(size_t i, size_t j) const
Return matrix element in row i and column j.
Definition: CpuMatrix.h:161
const TCpuBuffer< AFloat > & GetDeviceBuffer() const
Definition: CpuMatrix.h:108
const AFloat * GetRawDataPointer() const
Definition: CpuMatrix.h:167
const TCpuBuffer< AFloat > & GetBuffer() const
Definition: CpuMatrix.h:105
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:191
size_t GetSize() const
Definition: CpuMatrix.h:158
TCpuBuffer< AFloat > & GetDeviceBuffer()
Definition: CpuMatrix.h:107
size_t GetNrows() const
Definition: CpuMatrix.h:155
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:98
TCpuMatrix & operator=(TCpuMatrix &&)=default
TCpuBuffer< AFloat > fBuffer
The buffer holding the matrix elements in column-major format.
Definition: CpuMatrix.h:91
TCpuBuffer< AFloat > & GetBuffer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
Definition: CpuMatrix.h:104
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition: CpuMatrix.h:212
static Executor & GetThreadExecutor()
Definition: CpuMatrix.h:169
TCpuMatrix< AFloat > & operator=(const TMatrixT< AFloat > &)
copy from a TMAtrixT .
size_t GetNoElements() const
Definition: CpuMatrix.h:157
TCpuMatrix(const TCpuMatrix &)=default
Base Excutor class.
Definition: Executor.h:35
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:110
TMatrixT.
Definition: TMatrixT.h:39
const Int_t n
Definition: legend1.C:16
static double A[]
create variable transformations
auto * m
Definition: textangle.C:8