17#ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
18#define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
33#if defined(DEBUG_TMVA_TCPUMATRIX)
46#define TMVA_DNN_PrintTCpuMatrix(mat, text) \
48 auto _dpointer = mat.GetRawDataPointer(); \
49 if (_dpointer == NULL) { \
50 std::cout << #mat << " is null pointer" << std::endl; \
53 auto _nrows = mat.GetNrows(); \
54 auto _ncols = mat.GetNcols(); \
55 std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")"\
56 << "--------------------" << std::endl; \
57 for (size_t _i = 0; _i < _nrows; _i++) { \
58 for (size_t _j = 0; _j < _ncols; _j++) { \
59 std::cout << mat(_i, _j); \
60 if (_j < _ncols - 1) std::cout << ","; \
62 std::cout << std::endl; \
66#define TMVA_DNN_PrintTCpuMatrix(mat, text)
87template<
typename AFloat>
91 static std::vector<AFloat>
fOnes;
134 template <
typename Function_t>
135 void Map(Function_t &
f);
139 template <
typename Function_t>
173template<
typename AFloat>
179template<
typename AFloat>
185 const size_t minElements = 1000;
187 if (nElements <= minElements)
return nElements;
188 if (nElements < nCpu*minElements) {
189 size_t nt = nElements/minElements;
192 return nElements/nCpu;
199template<
typename AFloat>
200template<
typename Function_t>
203 AFloat *
data = GetRawDataPointer();
204 size_t nelements = GetNoElements();
207 auto ff = [
data, &nsteps, &nelements, &
f](
UInt_t workerID)
209 size_t jMax = std::min(workerID+nsteps,nelements);
210 for (
size_t j = workerID; j < jMax; ++j) {
216 if (nsteps < nelements) {
220 for (
size_t i = 0; i < nelements; i+=nsteps)
231template<
typename AFloat>
232template<
typename Function_t>
235 AFloat *dataB = GetRawDataPointer();
236 const AFloat *dataA =
A.GetRawDataPointer();
238 size_t nelements = GetNoElements();
242 auto ff = [&dataB, &dataA, &nsteps, &nelements, &
f](
UInt_t workerID)
244 size_t jMax = std::min(workerID+nsteps,nelements);
245 for (
size_t j = workerID; j < jMax; ++j) {
246 dataB[j] =
f(dataA[j]);
250 if (nsteps < nelements) {
254 for (
size_t i = 0; i < nelements; i+=nsteps)
264template<
typename AFloat>
267 for (
size_t j = 0; j < fNCols; j++) {
268 for (
size_t i = 0; i < fNRows; i++) {
#define TMVA_DNN_PrintTCpuMatrix(mat, text)
A pseudo container class which is a generator of indices.
This class provides a simple interface to execute the same task multiple times in parallel,...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
static Config & Instance()
static function: returns TMVA instance
ROOT::TThreadExecutor & GetThreadExecutor()
TCpuMatrix(TCpuMatrix &&)=default
static std::vector< AFloat > fOnes
Vector filled with ones used for BLAS calls.
TCpuMatrix & operator=(const TCpuMatrix &)=default
TCpuMatrix(const TCpuBuffer< AFloat > &buffer, size_t m, size_t n)
Construct a m-times-n matrix from the given buffer.
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix.
TCpuMatrix(size_t nRows, size_t nCols)
Construct matrix and allocate space for its elements.
static size_t GetOnePointerSize()
void Zero()
Clear content of the matrix and initialize to zero elements.
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
AFloat & operator()(size_t i, size_t j)
static const AFloat * GetOnePointer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
TCpuMatrix(const TMatrixT< AFloat > &)
Construct a TCpuMatrix object by (deeply) copying from a TMatrixT<Double_t> matrix.
AFloat operator()(size_t i, size_t j) const
Return matrix element in row i and column j.
const AFloat * GetRawDataPointer() const
static size_t GetNWorkItems(size_t nelements)
static ROOT::TThreadExecutor & GetThreadExecutor()
static void InitializeOneVector(size_t n)
TCpuMatrix & operator=(TCpuMatrix &&)=default
TCpuBuffer< AFloat > fBuffer
The buffer holding the matrix elements in column-major format.
void Map(Function_t &f)
Map the given function over the matrix elements.
size_t GetNoElements() const
TCpuMatrix(const TCpuMatrix &)=default
Abstract ClassifierFactory template that handles arbitrary types.