17 #ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX 18 #define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX 29 #if defined(DEBUG_TMVA_TCPUMATRIX) 30 #define PrintMatrix(mat, text) \ 32 auto _dpointer = mat.GetRawDataPointer(); \ 33 if (_dpointer == NULL) { \ 34 std::cout << #mat << " is null pointer" << std::endl; \ 37 auto _nrows = mat.GetNrows(); \ 38 auto _ncols = mat.GetNcols(); \ 39 std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")" \ 40 << "--------------------" << std::endl; \ 41 for (size_t _i = 0; _i < _nrows; _i++) { \ 42 for (size_t _j = 0; _j < _ncols; _j++) { \ 43 std::cout << mat(_i, _j); \ 44 if (_j < _ncols - 1) std::cout << ","; \ 46 std::cout << std::endl; \ 50 #define PrintMatrix(mat, text) 71 template<
typename AFloat>
75 static std::vector<AFloat>
fOnes;
118 template <
typename Function_t>
119 void Map(Function_t &
f);
123 template <
typename Function_t>
131 AFloat
operator()(
size_t i,
size_t j)
const {
return fBuffer[j * fNRows + i];}
132 AFloat &
operator()(
size_t i,
size_t j) {
return fBuffer[j * fNRows + i];}
157 template<
typename AFloat>
163 template<
typename AFloat>
169 if (nElements <= nCpu)
return 1;
170 if (nElements < nCpu*20)
return nElements/nCpu;
171 return nElements/(nCpu*10);
176 template<
typename AFloat>
177 template<
typename Function_t>
184 auto ff = [
data, &nsteps, &nelements, &
f](
UInt_t workerID)
186 for (
size_t j = 0; j < nsteps; ++j) {
187 size_t idx = workerID+j;
188 if (idx >= nelements)
break;
189 data[idx] =
f(data[idx]);
197 for (
size_t i = 0; i < nelements; i+=nsteps)
203 template<
typename AFloat>
204 template<
typename Function_t>
214 auto ff = [&dataB, &dataA, &nsteps, &nelements, &
f](
UInt_t workerID)
216 for (
size_t j = 0; j < nsteps; ++j) {
217 size_t idx = workerID+j;
218 if (idx >= nelements)
break;
219 dataB[idx] =
f(dataA[idx]);
226 for (
size_t i = 0; i < nelements; i+=nsteps)
232 template<
typename AFloat>
235 for (
size_t j = 0; j <
fNCols; j++) {
236 for (
size_t i = 0; i <
fNRows; i++) {
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
AFloat operator()(size_t i, size_t j) const
Return matrix element in row i and column j.
TCpuBuffer< AFloat > fBuffer
The buffer holding the matrix elements in column-major format.
static Config & Instance()
static function: returns TMVA instance
static void InitializeOneVector(size_t n)
static size_t GetNWorkItems(size_t nelements)
#define PrintMatrix(mat, text)
AFloat & operator()(size_t i, size_t j)
size_t GetNElements() const
static std::vector< AFloat > fOnes
Vector filled with ones used for BLAS calls.
ROOT::TThreadExecutor & GetThreadExecutor()
This class provides a simple interface to execute the same task multiple times in parallel...
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix...
static size_t GetOnePointerSize()
static ROOT::TThreadExecutor & GetThreadExecutor()
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
TCpuMatrix & operator=(const TCpuMatrix &)=default
A pseudo container class which is a generator of indices.
static const AFloat * GetOnePointer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
TCpuMatrix(size_t nRows, size_t nCols)
Construct matrix and allocate space for its elements.
void Map(Function_t &f)
Map the given function over the matrix elements.
void Zero()
Clear content of the matrix and initialize to zero elements.
const AFloat * GetRawDataPointer() const
Abstract ClassifierFactory template that handles arbitrary types.