Logo ROOT   6.16/01
Reference Guide
CpuMatrix.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////
13// Definition of the CpuMatrix class used to represent //
14// weight and bias matrices in neural nets. //
15//////////////////////////////////////////////////////////
16
17#ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
18#define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
19
20#ifdef R__USE_IMT
21#define DL_USE_MTE // use MT with tbb
22#endif
23
24#include <cstddef>
25#include <vector>
26
27#include "TMatrix.h"
28#include "TMVA/Config.h"
29#include "CpuBuffer.h"
30#include <TMVA/Config.h>
31
32// #define DEBUG_TMVA_TCPUMATRIX
33#if defined(DEBUG_TMVA_TCPUMATRIX)
34/*
35 * Debug(!) function for printing matrices.
36 *
37 * Prints the input expression `mat` using preprocessor directives (with
38 * `#mat`). E.g. `PrintMatrix(matA, "Test")` _could_ generate
39 * "matA is null pointer".
40 *
41 * Note: This is a preprocessor macro. It does _not_ respect namespaces.
42 *
43 * @param mat Matrix to print
44 * @param text Name of matrix
45 */
46#define TMVA_DNN_PrintTCpuMatrix(mat, text) \
47{ \
48 auto _dpointer = mat.GetRawDataPointer(); \
49 if (_dpointer == NULL) { \
50 std::cout << #mat << " is null pointer" << std::endl; \
51 exit(1); \
52 } \
53 auto _nrows = mat.GetNrows(); \
54 auto _ncols = mat.GetNcols(); \
55 std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")"\
56 << "--------------------" << std::endl; \
57 for (size_t _i = 0; _i < _nrows; _i++) { \
58 for (size_t _j = 0; _j < _ncols; _j++) { \
59 std::cout << mat(_i, _j); \
60 if (_j < _ncols - 1) std::cout << ","; \
61 } \
62 std::cout << std::endl; \
63 } \
64}
65#else
66#define TMVA_DNN_PrintTCpuMatrix(mat, text)
67#endif
68
69namespace TMVA
70{
71namespace DNN
72{
73
74/** The TCpuMatrix class.
75 *
76 * Matrix class for multi-threaded CPU architectures. Uses the TCpuBuffer
77 * class to store the matrices in column-major format for compatibility with
78 * BLAS. Provides Map and MapFrom member functions to simplify the application of
79 * activation functions and derivatives to matrices.
80 *
81 * Copying and assignment of TCpuMatrix objects only performs shallow copies, i.e.
82 * copying is fast and the resulting objects share the element data.
83 *
84 * \tparam AFloat The floating point type used to represent the matrix elements.
85 */
86//______________________________________________________________________________
87template<typename AFloat>
89{
90private:
91 static std::vector<AFloat> fOnes; ///< Vector filled with ones used for BLAS calls.
92
93 TCpuBuffer<AFloat> fBuffer; ///< The buffer holding the matrix elements
94 ///< in column-major format.
95 size_t fNCols;
96 size_t fNRows;
97
98public:
99
100 /** Returns pointer to a vector holding only ones with a guaranteed length
101 * of the number of columns of every instantiated CpuMatrix object. */
102 static const AFloat * GetOnePointer() {return fOnes.data();}
103
104 static size_t GetOnePointerSize() { return fOnes.size(); }
105
106 static void InitializeOneVector( size_t n);
107
108 /** Construct matrix and allocate space for its elements. */
109 TCpuMatrix(size_t nRows, size_t nCols);
110 /** Construct a TCpuMatrix object by (deeply) copying from a
111 * TMatrixT<Double_t> matrix. */
113 /** Construct a m-times-n matrix from the given buffer. The size must of
114 * course match. */
115 TCpuMatrix(const TCpuBuffer<AFloat> &buffer, size_t m, size_t n);
116
117 //N.B the default copy constructor does a shallow copy (NOT a deep one) !
118 TCpuMatrix(const TCpuMatrix &) = default;
119 TCpuMatrix( TCpuMatrix &&) = default;
120 TCpuMatrix & operator=(const TCpuMatrix &) = default;
122 ~TCpuMatrix() = default;
123
124 /** Clear content of the matrix and initialize to zero elements
125 */
126 void Zero();
127
128 /** Convert to a TMatrixT<AFloat_t> object. Performs a deep copy of the matrix
129 * elements. */
130 operator TMatrixT<AFloat>() const;
131
132 /** Map the given function over the matrix elements. Executed in parallel
133 * using TThreadExecutor. */
134 template <typename Function_t>
135 void Map(Function_t &f);
136
137 /** Same as maps but takes the input values from the matrix \p A and writes
138 * the results in this matrix. */
139 template <typename Function_t>
140 void MapFrom(Function_t &f, const TCpuMatrix & A);
141
142 size_t GetNrows() const {return fNRows;}
143 size_t GetNcols() const {return fNCols;}
144 size_t GetNoElements() const {return fNRows * fNCols;}
145
146 /** Return matrix element in row \p i and column \p j. */
147 AFloat operator()(size_t i, size_t j) const {return fBuffer[j * fNRows + i];}
148 AFloat & operator()(size_t i, size_t j) {return fBuffer[j * fNRows + i];}
149
150 /** Return raw pointer to the elements stored contiguously in column-major
151 * order. */
152 AFloat * GetRawDataPointer() {return fBuffer;}
153 const AFloat * GetRawDataPointer() const {return fBuffer;}
154
156
157 // static function to get the number of elements for task
158 static size_t GetNWorkItems(size_t nelements);
159
160 // print matrix
161 void Print() const {
162 TCpuMatrix cpuMatrix = *this;
163 TMVA_DNN_PrintTCpuMatrix(cpuMatrix,"CpuMatrix");
164 }
165
166
167private:
168
169 void Initialize();
170
171};
172
173template<typename AFloat>
174std::vector<AFloat> TCpuMatrix<AFloat>::fOnes {};
175
176
177// Inline Functions.
178//______________________________________________________________________________
179template<typename AFloat>
180size_t TCpuMatrix<AFloat>::GetNWorkItems(size_t nElements)
181{
182 // nElements should have at least 100
183 // const size_t nWorkers = TMVA::Config::Instance().GetNCpu();
184 // return (nElements > nWorkers) ? (int) nElements/nWorkers : 1;
185 const size_t minElements = 1000;
186 const size_t nCpu = TMVA::Config::Instance().GetNCpu();
187 if (nElements <= minElements) return nElements;
188 if (nElements < nCpu*minElements) {
189 size_t nt = nElements/minElements;
190 return nElements/nt;
191 }
192 return nElements/nCpu;
193 // if (nElements < nCpu*20) return nElements/nCpu;
194 // return nElements/(nCpu*10);
195}
196
197
198//______________________________________________________________________________
199template<typename AFloat>
200template<typename Function_t>
201inline void TCpuMatrix<AFloat>::Map(Function_t &f)
202{
203 AFloat *data = GetRawDataPointer();
204 size_t nelements = GetNoElements();
205 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
206
207 auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID)
208 {
209 size_t jMax = std::min(workerID+nsteps,nelements);
210 for (size_t j = workerID; j < jMax; ++j) {
211 data[j] = f(data[j]);
212 }
213 return 0;
214 };
215
216 if (nsteps < nelements) {
217#ifdef DL_USE_MTE
218 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0,nelements,nsteps));
219#else
220 for (size_t i = 0; i < nelements; i+=nsteps)
221 ff(i);
222#endif
223 }
224 else {
225 R__ASSERT(nelements == nsteps);
226 ff(0);
227 }
228}
229
230//______________________________________________________________________________
231template<typename AFloat>
232template<typename Function_t>
233inline void TCpuMatrix<AFloat>::MapFrom(Function_t &f, const TCpuMatrix &A)
234{
235 AFloat *dataB = GetRawDataPointer();
236 const AFloat *dataA = A.GetRawDataPointer();
237
238 size_t nelements = GetNoElements();
239 R__ASSERT(nelements == A.GetNoElements() );
240 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
241
242 auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID)
243 {
244 size_t jMax = std::min(workerID+nsteps,nelements);
245 for (size_t j = workerID; j < jMax; ++j) {
246 dataB[j] = f(dataA[j]);
247 }
248 return 0;
249 };
250 if (nsteps < nelements) {
251#ifdef DL_USE_MTE
252 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0,nelements,nsteps));
253#else
254 for (size_t i = 0; i < nelements; i+=nsteps)
255 ff(i);
256#endif
257 }
258 else {
259 R__ASSERT(nelements == nsteps);
260 ff(0);
261 }
262}
263//______________________________________________________________________________
264template<typename AFloat>
266{
267 for (size_t j = 0; j < fNCols; j++) {
268 for (size_t i = 0; i < fNRows; i++) {
269 (*this)(i, j) = 0;
270 }
271 }
272}
273
274
275} // namespace DNN
276} // namespace TMVA
277
278#endif
#define TMVA_DNN_PrintTCpuMatrix(mat, text)
Definition: CpuMatrix.h:66
#define f(i)
Definition: RSha256.hxx:104
unsigned int UInt_t
Definition: RtypesCore.h:42
#define R__ASSERT(e)
Definition: TError.h:96
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
This class provides a simple interface to execute the same task multiple times in parallel,...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
UInt_t GetNCpu()
Definition: Config.h:76
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:108
ROOT::TThreadExecutor & GetThreadExecutor()
Definition: Config.h:82
The TCpuMatrix class.
Definition: CpuMatrix.h:89
TCpuMatrix(TCpuMatrix &&)=default
static std::vector< AFloat > fOnes
Vector filled with ones used for BLAS calls.
Definition: CpuMatrix.h:91
TCpuMatrix & operator=(const TCpuMatrix &)=default
TCpuMatrix(const TCpuBuffer< AFloat > &buffer, size_t m, size_t n)
Construct a m-times-n matrix from the given buffer.
size_t GetNcols() const
Definition: CpuMatrix.h:143
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix.
Definition: CpuMatrix.h:233
TCpuMatrix(size_t nRows, size_t nCols)
Construct matrix and allocate space for its elements.
Definition: CpuMatrix.cxx:23
static size_t GetOnePointerSize()
Definition: CpuMatrix.h:104
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:265
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:152
AFloat & operator()(size_t i, size_t j)
Definition: CpuMatrix.h:148
void Print() const
Definition: CpuMatrix.h:161
static const AFloat * GetOnePointer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
Definition: CpuMatrix.h:102
TCpuMatrix(const TMatrixT< AFloat > &)
Construct a TCpuMatrix object by (deeply) copying from a TMatrixT<Double_t> matrix.
AFloat operator()(size_t i, size_t j) const
Return matrix element in row i and column j.
Definition: CpuMatrix.h:147
const AFloat * GetRawDataPointer() const
Definition: CpuMatrix.h:153
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:180
static ROOT::TThreadExecutor & GetThreadExecutor()
Definition: CpuMatrix.h:155
size_t GetNrows() const
Definition: CpuMatrix.h:142
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:87
TCpuMatrix & operator=(TCpuMatrix &&)=default
TCpuBuffer< AFloat > fBuffer
The buffer holding the matrix elements in column-major format.
Definition: CpuMatrix.h:93
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition: CpuMatrix.h:201
size_t GetNoElements() const
Definition: CpuMatrix.h:144
TCpuMatrix(const TCpuMatrix &)=default
TMatrixT.
Definition: TMatrixT.h:39
const Int_t n
Definition: legend1.C:16
static double A[]
Abstract ClassifierFactory template that handles arbitrary types.
auto * m
Definition: textangle.C:8