doc/v620/CpuMatrix_8h_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 20/07/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


//////////////////////////////////////////////////////////

// Definition of the CpuMatrix class used to represent  //

// weight and bias matrices in neural nets.             //

//////////////////////////////////////////////////////////


#ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX

#define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX


#ifdef R__USE_IMT

#define DL_USE_MTE // use MT with tbb

#endif


#include <cstddef>

#include <vector>


#include "TMatrix.h"

#include "TMVA/Config.h"

#include "CpuBuffer.h"

#include <TMVA/Config.h>


// #define DEBUG_TMVA_TCPUMATRIX

#if defined(DEBUG_TMVA_TCPUMATRIX)

/*

 * Debug(!) function for printing matrices.

 *

 * Prints the input expression `mat` using preprocessor directives (with

 * `#mat`). E.g. `PrintMatrix(matA, "Test")` _could_ generate

 * "matA is null pointer".

 *

 * Note: This is a preprocessor macro. It does _not_ respect namespaces.

 *

 * @param mat  Matrix to print

 * @param text Name of matrix

 */

#define TMVA_DNN_PrintTCpuMatrix(mat, text)                                                                \

   {                                                                                                       \

      auto _dpointer = mat.GetRawDataPointer();                                                            \

      if (_dpointer == NULL) {                                                                             \

         std::cout << #mat << " is null pointer" << std::endl;                                             \

         exit(1);                                                                                          \

      }                                                                                                    \

      auto _nrows = mat.GetNrows();                                                                        \

      auto _ncols = mat.GetNcols();                                                                        \

      std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")" \

                << "--------------------" << std::endl;                                                    \

      for (size_t _i = 0; _i < _nrows; _i++) {                                                             \

         for (size_t _j = 0; _j < _ncols; _j++) {                                                          \

            std::cout << mat(_i, _j);                                                                      \

            if (_j < _ncols - 1)                                                                           \

               std::cout << ",";                                                                           \

         }                                                                                                 \

         std::cout << std::endl;                                                                           \

      }                                                                                                    \

   }

#else

#define TMVA_DNN_PrintTCpuMatrix(mat, text)

#endif


namespace TMVA {

namespace DNN {


/** The TCpuMatrix class.

 *

 * Matrix class for multi-threaded CPU architectures. Uses the TCpuBuffer

 * class to store the matrices in column-major format for compatibility with

 * BLAS. Provides Map and MapFrom member functions to simplify the application of

 * activation functions and derivatives to matrices.

 *

 * Copying and assignment of TCpuMatrix objects only performs shallow copies, i.e.

 * copying is fast and the resulting objects share the element data.

 *

 * \tparam AFloat The floating point type used to represent the matrix elements.

 */

//______________________________________________________________________________

template <typename AFloat>

class TCpuMatrix {

private:

   static std::vector<AFloat> fOnes; ///< Vector filled with ones used for BLAS calls.


public:

   TCpuBuffer<AFloat> fBuffer; ///< The buffer holding the matrix elements

                               ///< in column-major format.

private:

   size_t fNCols;

   size_t fNRows;


public:

   // friend class TCpuTensor<AFloat>;


   /** Returns pointer to a vector holding only ones with a guaranteed length

    *  of the number of columns of every instantiated CpuMatrix object. */


   TCpuBuffer<AFloat>& GetBuffer() {return fBuffer;}

   const TCpuBuffer<AFloat>& GetBuffer() const {return fBuffer;}


   static const AFloat *GetOnePointer() { return fOnes.data(); }


   static size_t GetOnePointerSize() { return fOnes.size(); }


   static void InitializeOneVector(size_t n);


   TCpuMatrix() : fNCols(0), fNRows(0) {}


   /** Construct matrix and allocate space for its elements. */

   TCpuMatrix(size_t nRows, size_t nCols);

   /** Construct a TCpuMatrix object by (deeply) copying from a

    *  TMatrixT<Double_t> matrix. */

   TCpuMatrix(const TMatrixT<AFloat> &);

   /** Construct a m-times-n matrix from the given buffer. The size must of

    *  course match. */

   TCpuMatrix(const TCpuBuffer<AFloat> &buffer, size_t m, size_t n);


   // N.B the default copy constructor does a shallow copy (NOT a deep one) !

   TCpuMatrix(const TCpuMatrix &) = default;

   TCpuMatrix(TCpuMatrix &&) = default;

   TCpuMatrix &operator=(const TCpuMatrix &) = default;

   TCpuMatrix &operator=(TCpuMatrix &&) = default;

   ~TCpuMatrix() = default;


   /** Clear content of the matrix and initialize to zero elements

    */

   void Zero();


   /** Convert to a TMatrixT<AFloat_t> object. Performs a deep copy of the matrix

    *  elements. */

   operator TMatrixT<AFloat>() const;


   /** Map the given function over the matrix elements. Executed in parallel

    *  using TThreadExecutor. */

   template <typename Function_t>

   void Map(Function_t &f);


   /** Same as maps but takes the input values from the matrix \p A and writes

    *  the results in this matrix. */

   template <typename Function_t>

   void MapFrom(Function_t &f, const TCpuMatrix &A);


   size_t GetNrows() const { return fNRows; }

   size_t GetNcols() const { return fNCols; }

   size_t GetNoElements() const { return fNRows * fNCols; }

   size_t GetSize() const { return fNRows * fNCols; }


   /** Return matrix element in row \p i and column \p j. */

   AFloat operator()(size_t i, size_t j) const { return fBuffer[j * fNRows + i]; }

   AFloat &operator()(size_t i, size_t j) { return fBuffer[j * fNRows + i]; }


   /** Return raw pointer to the elements stored contiguously in column-major

    *  order. */

   AFloat *GetRawDataPointer() { return fBuffer; }

   const AFloat *GetRawDataPointer() const { return fBuffer; }


   static Executor &GetThreadExecutor() { return TMVA::Config::Instance().GetThreadExecutor(); }


   // static function to get the number of elements for task

   static size_t GetNWorkItems(size_t nelements);


   // print matrix

   void Print() const

   {

      TCpuMatrix cpuMatrix = *this;

      TMVA_DNN_PrintTCpuMatrix(cpuMatrix, "CpuMatrix");

   }


private:

   void Initialize();

};


template <typename AFloat>

std::vector<AFloat> TCpuMatrix<AFloat>::fOnes{};


// Inline Functions.

//______________________________________________________________________________

template <typename AFloat>

size_t TCpuMatrix<AFloat>::GetNWorkItems(size_t nElements)

{

   // nElements should have at least 100

   // const size_t nWorkers = TMVA::Config::Instance().GetNCpu();

   // return  (nElements > nWorkers) ?  (int) nElements/nWorkers : 1;

   const size_t minElements = 1000;

   const size_t nCpu = TMVA::Config::Instance().GetNCpu();

   if (nElements <= minElements)

      return nElements;

   if (nElements < nCpu * minElements) {

      size_t nt = nElements / minElements;

      return nElements / nt;

   }

   return nElements / nCpu;

   // if (nElements < nCpu*20) return nElements/nCpu;

   // return nElements/(nCpu*10);

}


//______________________________________________________________________________

template <typename AFloat>

template <typename Function_t>

inline void TCpuMatrix<AFloat>::Map(Function_t &f)

{

   AFloat *data = GetRawDataPointer();

   size_t nelements = GetNoElements();

   size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);


   auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {

      size_t jMax = std::min(workerID + nsteps, nelements);

      for (size_t j = workerID; j < jMax; ++j) {

         data[j] = f(data[j]);

      }

      return 0;

   };


   if (nsteps < nelements) {

      TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));


      // for (size_t i = 0;  i < nelements; i+=nsteps)

      //    ff(i);


   } else {

      R__ASSERT(nelements == nsteps);

      ff(0);

   }

}


//______________________________________________________________________________

template <typename AFloat>

template <typename Function_t>

inline void TCpuMatrix<AFloat>::MapFrom(Function_t &f, const TCpuMatrix &A)

{

   AFloat *dataB = GetRawDataPointer();

   const AFloat *dataA = A.GetRawDataPointer();


   size_t nelements = GetNoElements();

   R__ASSERT(nelements == A.GetNoElements());

   size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);


   auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {

      size_t jMax = std::min(workerID + nsteps, nelements);

      for (size_t j = workerID; j < jMax; ++j) {

         dataB[j] = f(dataA[j]);

      }

      return 0;

   };

   if (nsteps < nelements) {

      TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));

      // for (size_t i = 0;  i < nelements; i+=nsteps)

      //    ff(i);


   } else {

      R__ASSERT(nelements == nsteps);

      ff(0);

   }

}

//______________________________________________________________________________

template <typename AFloat>

void TCpuMatrix<AFloat>::Zero()

{

   for (size_t j = 0; j < fNCols; j++) {

      for (size_t i = 0; i < fNRows; i++) {

         (*this)(i, j) = 0;

      }

   }

}


} // namespace DNN

} // namespace TMVA


#endif

Config.h

CpuBuffer.h

TMVA_DNN_PrintTCpuMatrix
#define TMVA_DNN_PrintTCpuMatrix(mat, text)
Definition: CpuMatrix.h:67

f
#define f(i)
Definition: RSha256.hxx:104

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:96

TMatrix.h

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66

TMVA::Config::GetThreadExecutor
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition: Config.h:83

TMVA::Config::GetNCpu
UInt_t GetNCpu()
Definition: Config.h:72

TMVA::Config::Instance
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:107

TMVA::DNN::TCpuBuffer
TCpuBuffer.
Definition: CpuBuffer.h:44

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:87

TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix(TCpuMatrix &&)=default

TMVA::DNN::TCpuMatrix::fOnes
static std::vector< AFloat > fOnes
Vector filled with ones used for BLAS calls.
Definition: CpuMatrix.h:89

TMVA::DNN::TCpuMatrix::operator=
TCpuMatrix & operator=(const TCpuMatrix &)=default

TMVA::DNN::TCpuMatrix::~TCpuMatrix
~TCpuMatrix()=default

TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix(const TCpuBuffer< AFloat > &buffer, size_t m, size_t n)
Construct a m-times-n matrix from the given buffer.

TMVA::DNN::TCpuMatrix::GetNcols
size_t GetNcols() const
Definition: CpuMatrix.h:152

TMVA::DNN::TCpuMatrix::MapFrom
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix.
Definition: CpuMatrix.h:237

TMVA::DNN::TCpuMatrix::fNCols
size_t fNCols
Definition: CpuMatrix.h:95

TMVA::DNN::TCpuMatrix::GetOnePointerSize
static size_t GetOnePointerSize()
Definition: CpuMatrix.h:111

TMVA::DNN::TCpuMatrix::Zero
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:265

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:162

TMVA::DNN::TCpuMatrix::operator()
AFloat & operator()(size_t i, size_t j)
Definition: CpuMatrix.h:158

TMVA::DNN::TCpuMatrix::Print
void Print() const
Definition: CpuMatrix.h:171

TMVA::DNN::TCpuMatrix::GetOnePointer
static const AFloat * GetOnePointer()
Definition: CpuMatrix.h:109

TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix(const TMatrixT< AFloat > &)
Construct a TCpuMatrix object by (deeply) copying from a TMatrixT<Double_t> matrix.

TMVA::DNN::TCpuMatrix::operator()
AFloat operator()(size_t i, size_t j) const
Return matrix element in row i and column j.
Definition: CpuMatrix.h:157

TMVA::DNN::TCpuMatrix::GetRawDataPointer
const AFloat * GetRawDataPointer() const
Definition: CpuMatrix.h:163

TMVA::DNN::TCpuMatrix::GetBuffer
const TCpuBuffer< AFloat > & GetBuffer() const
Definition: CpuMatrix.h:106

TMVA::DNN::TCpuMatrix::GetNWorkItems
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:187

TMVA::DNN::TCpuMatrix::GetSize
size_t GetSize() const
Definition: CpuMatrix.h:154

TMVA::DNN::TCpuMatrix::GetNrows
size_t GetNrows() const
Definition: CpuMatrix.h:151

TMVA::DNN::TCpuMatrix::InitializeOneVector
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:87

TMVA::DNN::TCpuMatrix::operator=
TCpuMatrix & operator=(TCpuMatrix &&)=default

TMVA::DNN::TCpuMatrix::fBuffer
TCpuBuffer< AFloat > fBuffer
The buffer holding the matrix elements in column-major format.
Definition: CpuMatrix.h:92

TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix()
Definition: CpuMatrix.h:115

TMVA::DNN::TCpuMatrix::GetBuffer
TCpuBuffer< AFloat > & GetBuffer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
Definition: CpuMatrix.h:105

TMVA::DNN::TCpuMatrix::Map
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition: CpuMatrix.h:208

TMVA::DNN::TCpuMatrix::GetThreadExecutor
static Executor & GetThreadExecutor()
Definition: CpuMatrix.h:165

TMVA::DNN::TCpuMatrix::GetNoElements
size_t GetNoElements() const
Definition: CpuMatrix.h:153

TMVA::DNN::TCpuMatrix::Initialize
void Initialize()
Definition: CpuMatrix.cxx:74

TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix(const TCpuMatrix &)=default

TMVA::DNN::TCpuMatrix::fNRows
size_t fNRows
Definition: CpuMatrix.h:96

TMVA::Executor
Base Excutor class.
Definition: Executor.h:35

TMVA::Executor::Foreach
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:110

TMatrixT
TMatrixT.
Definition: TMatrixT.h:39

n
const Int_t n
Definition: legend1.C:16

ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170

TMVA
create variable transformations
Definition: GeneticMinimizer.h:21

m
auto * m
Definition: textangle.C:8