doc/v616/CpuMatrix_8h_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 20/07/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


//////////////////////////////////////////////////////////

// Definition of the CpuMatrix class used to represent  //

// weight and bias matrices in neural nets.             //

//////////////////////////////////////////////////////////


#ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX

#define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX


#ifdef R__USE_IMT

#define DL_USE_MTE  // use MT with tbb

#endif


#include <cstddef>

#include <vector>


#include "TMatrix.h"

#include "TMVA/Config.h"

#include "CpuBuffer.h"

#include <TMVA/Config.h>


// #define DEBUG_TMVA_TCPUMATRIX

#if defined(DEBUG_TMVA_TCPUMATRIX)

/*

 * Debug(!) function for printing matrices.

 *

 * Prints the input expression `mat` using preprocessor directives (with

 * `#mat`). E.g. `PrintMatrix(matA, "Test")` _could_ generate

 * "matA is null pointer".

 *

 * Note: This is a preprocessor macro. It does _not_ respect namespaces.

 *

 * @param mat  Matrix to print

 * @param text Name of matrix

 */

#define TMVA_DNN_PrintTCpuMatrix(mat, text)                                                                \

{                                                                                                      \

   auto _dpointer = mat.GetRawDataPointer();                                                           \

   if (_dpointer == NULL) {                                                                            \

      std::cout << #mat << " is null pointer" << std::endl;                                            \

      exit(1);                                                                                         \

   }                                                                                                   \

   auto _nrows = mat.GetNrows();                                                                       \

   auto _ncols = mat.GetNcols();                                                                       \

   std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")"\

             << "--------------------" << std::endl;                                                   \

   for (size_t _i = 0; _i < _nrows; _i++) {                                                            \

      for (size_t _j = 0; _j < _ncols; _j++) {                                                         \

         std::cout << mat(_i, _j);                                                                     \

         if (_j < _ncols - 1) std::cout << ",";                                                        \

      }                                                                                                \

      std::cout << std::endl;                                                                          \

   }                                                                                                   \

}

#else

#define TMVA_DNN_PrintTCpuMatrix(mat, text)

#endif


namespace TMVA

{

namespace DNN

{


/** The TCpuMatrix class.

 *

 * Matrix class for multi-threaded CPU architectures. Uses the TCpuBuffer

 * class to store the matrices in column-major format for compatibility with

 * BLAS. Provides Map and MapFrom member functions to simplify the application of

 * activation functions and derivatives to matrices.

 *

 * Copying and assignment of TCpuMatrix objects only performs shallow copies, i.e.

 * copying is fast and the resulting objects share the element data.

 *

 * \tparam AFloat The floating point type used to represent the matrix elements.

 */

//______________________________________________________________________________

template<typename AFloat>

class TCpuMatrix

{

private:

   static std::vector<AFloat> fOnes;  ///< Vector filled with ones used for BLAS calls.


   TCpuBuffer<AFloat> fBuffer; ///< The buffer holding the matrix elements

                               ///< in column-major format.

   size_t     fNCols;

   size_t     fNRows;


public:


   /** Returns pointer to a vector holding only ones with a guaranteed length

    *  of the number of columns of every instantiated CpuMatrix object. */

   static const AFloat * GetOnePointer() {return fOnes.data();}


   static size_t GetOnePointerSize() { return fOnes.size(); }


   static void InitializeOneVector( size_t n);


   /** Construct matrix and allocate space for its elements. */

   TCpuMatrix(size_t nRows, size_t nCols);

   /** Construct a TCpuMatrix object by (deeply) copying from a

    *  TMatrixT<Double_t> matrix. */

   TCpuMatrix(const TMatrixT<AFloat> &);

   /** Construct a m-times-n matrix from the given buffer. The size must of

    *  course match. */

   TCpuMatrix(const TCpuBuffer<AFloat> &buffer, size_t m, size_t n);


   //N.B the default copy constructor does a shallow copy (NOT a deep one) !

   TCpuMatrix(const TCpuMatrix  &)             = default;

   TCpuMatrix(      TCpuMatrix &&)             = default;

   TCpuMatrix & operator=(const TCpuMatrix &)  = default;

   TCpuMatrix & operator=(TCpuMatrix &&)       = default;

   ~TCpuMatrix()                               = default;


   /** Clear content of the matrix and initialize to zero elements

    */

   void Zero();


   /** Convert to a TMatrixT<AFloat_t> object. Performs a deep copy of the matrix

    *  elements. */

   operator TMatrixT<AFloat>() const;


   /** Map the given function over the matrix elements. Executed in parallel

    *  using TThreadExecutor. */

   template <typename Function_t>

   void Map(Function_t &f);


   /** Same as maps but takes the input values from the matrix \p A and writes

    *  the results in this matrix. */

   template <typename Function_t>

   void MapFrom(Function_t &f, const TCpuMatrix & A);


   size_t GetNrows() const {return fNRows;}

   size_t GetNcols() const {return fNCols;}

   size_t GetNoElements() const {return fNRows * fNCols;}


   /** Return matrix element in row \p i and column \p j. */

   AFloat   operator()(size_t i, size_t j) const {return fBuffer[j * fNRows + i];}

   AFloat & operator()(size_t i, size_t j)       {return fBuffer[j * fNRows + i];}


   /** Return raw pointer to the elements stored contiguously in column-major

    *  order. */

   AFloat *       GetRawDataPointer()        {return fBuffer;}

   const AFloat * GetRawDataPointer()  const {return fBuffer;}


   static ROOT::TThreadExecutor &GetThreadExecutor() { return TMVA::Config::Instance().GetThreadExecutor(); }


    // static function to get the number of elements for task

   static size_t GetNWorkItems(size_t nelements);


   // print matrix

   void Print() const {

      TCpuMatrix cpuMatrix = *this;

      TMVA_DNN_PrintTCpuMatrix(cpuMatrix,"CpuMatrix");

   }


private:


   void Initialize();


};


template<typename AFloat>

std::vector<AFloat> TCpuMatrix<AFloat>::fOnes {};


// Inline Functions.

//______________________________________________________________________________

template<typename AFloat>

size_t TCpuMatrix<AFloat>::GetNWorkItems(size_t nElements)

{

   // nElements should have at least 100

   // const size_t nWorkers = TMVA::Config::Instance().GetNCpu();

   // return  (nElements > nWorkers) ?  (int) nElements/nWorkers : 1;

   const size_t minElements = 1000;

   const size_t nCpu = TMVA::Config::Instance().GetNCpu();

   if (nElements <= minElements) return nElements;

   if (nElements < nCpu*minElements) {

      size_t nt = nElements/minElements;

      return nElements/nt;

   }

   return nElements/nCpu;

   // if (nElements < nCpu*20) return nElements/nCpu;

   // return nElements/(nCpu*10);

}


//______________________________________________________________________________

template<typename AFloat>

template<typename Function_t>

inline void TCpuMatrix<AFloat>::Map(Function_t &f)

{

   AFloat  *data = GetRawDataPointer();

   size_t nelements =  GetNoElements();

   size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);


   auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID)

   {

      size_t jMax = std::min(workerID+nsteps,nelements);

      for (size_t j = workerID; j < jMax; ++j) {

         data[j] = f(data[j]);

      }

      return 0;

   };


   if (nsteps < nelements) {

#ifdef DL_USE_MTE

      TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0,nelements,nsteps));

#else

      for (size_t i = 0;  i < nelements; i+=nsteps)

         ff(i);

#endif

   }

   else {

      R__ASSERT(nelements == nsteps);

      ff(0);

   }

}


//______________________________________________________________________________

template<typename AFloat>

template<typename Function_t>

inline void TCpuMatrix<AFloat>::MapFrom(Function_t &f, const TCpuMatrix &A)

{

         AFloat  *dataB = GetRawDataPointer();

   const AFloat  *dataA = A.GetRawDataPointer();


   size_t nelements =  GetNoElements();

   R__ASSERT(nelements == A.GetNoElements() );

   size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);


   auto ff = [&dataB, &dataA,  &nsteps, &nelements, &f](UInt_t workerID)

   {

      size_t jMax = std::min(workerID+nsteps,nelements);

      for (size_t j = workerID; j < jMax; ++j) {

         dataB[j] = f(dataA[j]);

      }

      return 0;

   };

   if (nsteps < nelements) {

#ifdef DL_USE_MTE

      TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0,nelements,nsteps));

#else

      for (size_t i = 0;  i < nelements; i+=nsteps)

         ff(i);

#endif

   }

   else {

      R__ASSERT(nelements == nsteps);

      ff(0);

   }

}

//______________________________________________________________________________

template<typename AFloat>

void TCpuMatrix<AFloat>::Zero()

{

   for (size_t j = 0; j < fNCols; j++) {

      for (size_t i = 0; i < fNRows; i++) {

         (*this)(i, j) = 0;

      }

   }

}


} // namespace DNN

} // namespace TMVA


#endif

Config.h

CpuBuffer.h

TMVA_DNN_PrintTCpuMatrix
#define TMVA_DNN_PrintTCpuMatrix(mat, text)
Definition: CpuMatrix.h:66

f
#define f(i)
Definition: RSha256.hxx:104

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:96

TMatrix.h

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66

ROOT::TThreadExecutor
This class provides a simple interface to execute the same task multiple times in parallel,...
Definition: TThreadExecutor.hxx:35

ROOT::TThreadExecutor::Foreach
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
Definition: TThreadExecutor.hxx:118

TMVA::Config::GetNCpu
UInt_t GetNCpu()
Definition: Config.h:76

TMVA::Config::Instance
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:108

TMVA::Config::GetThreadExecutor
ROOT::TThreadExecutor & GetThreadExecutor()
Definition: Config.h:82

TMVA::DNN::TCpuBuffer
TCpuBuffer.
Definition: CpuBuffer.h:44

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:89

TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix(TCpuMatrix &&)=default

TMVA::DNN::TCpuMatrix::fOnes
static std::vector< AFloat > fOnes
Vector filled with ones used for BLAS calls.
Definition: CpuMatrix.h:91

TMVA::DNN::TCpuMatrix::operator=
TCpuMatrix & operator=(const TCpuMatrix &)=default

TMVA::DNN::TCpuMatrix::~TCpuMatrix
~TCpuMatrix()=default

TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix(const TCpuBuffer< AFloat > &buffer, size_t m, size_t n)
Construct a m-times-n matrix from the given buffer.

TMVA::DNN::TCpuMatrix::GetNcols
size_t GetNcols() const
Definition: CpuMatrix.h:143

TMVA::DNN::TCpuMatrix::MapFrom
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix.
Definition: CpuMatrix.h:233

TMVA::DNN::TCpuMatrix::fNCols
size_t fNCols
Definition: CpuMatrix.h:95

TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix(size_t nRows, size_t nCols)
Construct matrix and allocate space for its elements.
Definition: CpuMatrix.cxx:23

TMVA::DNN::TCpuMatrix::GetOnePointerSize
static size_t GetOnePointerSize()
Definition: CpuMatrix.h:104

TMVA::DNN::TCpuMatrix::Zero
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:265

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:152

TMVA::DNN::TCpuMatrix::operator()
AFloat & operator()(size_t i, size_t j)
Definition: CpuMatrix.h:148

TMVA::DNN::TCpuMatrix::Print
void Print() const
Definition: CpuMatrix.h:161

TMVA::DNN::TCpuMatrix::GetOnePointer
static const AFloat * GetOnePointer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
Definition: CpuMatrix.h:102

TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix(const TMatrixT< AFloat > &)
Construct a TCpuMatrix object by (deeply) copying from a TMatrixT<Double_t> matrix.

TMVA::DNN::TCpuMatrix::operator()
AFloat operator()(size_t i, size_t j) const
Return matrix element in row i and column j.
Definition: CpuMatrix.h:147

TMVA::DNN::TCpuMatrix::GetRawDataPointer
const AFloat * GetRawDataPointer() const
Definition: CpuMatrix.h:153

TMVA::DNN::TCpuMatrix::GetNWorkItems
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:180

TMVA::DNN::TCpuMatrix::GetThreadExecutor
static ROOT::TThreadExecutor & GetThreadExecutor()
Definition: CpuMatrix.h:155

TMVA::DNN::TCpuMatrix::GetNrows
size_t GetNrows() const
Definition: CpuMatrix.h:142

TMVA::DNN::TCpuMatrix::InitializeOneVector
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:87

TMVA::DNN::TCpuMatrix::operator=
TCpuMatrix & operator=(TCpuMatrix &&)=default

TMVA::DNN::TCpuMatrix::fBuffer
TCpuBuffer< AFloat > fBuffer
The buffer holding the matrix elements in column-major format.
Definition: CpuMatrix.h:93

TMVA::DNN::TCpuMatrix::Map
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition: CpuMatrix.h:201

TMVA::DNN::TCpuMatrix::GetNoElements
size_t GetNoElements() const
Definition: CpuMatrix.h:144

TMVA::DNN::TCpuMatrix::Initialize
void Initialize()
Definition: CpuMatrix.cxx:74

TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix(const TCpuMatrix &)=default

TMVA::DNN::TCpuMatrix::fNRows
size_t fNRows
Definition: CpuMatrix.h:96

TMatrixT
TMatrixT.
Definition: TMatrixT.h:39

n
const Int_t n
Definition: legend1.C:16

ApplicationClassificationKeras.data
data
Definition: ApplicationClassificationKeras.py:17

ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170

TMVA
Abstract ClassifierFactory template that handles arbitrary types.
Definition: GeneticMinimizer.h:21

m
auto * m
Definition: textangle.C:8