doc/v626/Cpu_2Arithmetic_8hxx_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 20/07/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


////////////////////////////////////////////////////////////

//  Implementation of Helper arithmetic functions for the //

// multi-threaded CPU implementation of DNNs.             //

////////////////////////////////////////////////////////////


#include "TMVA/DNN/Architectures/Cpu.h"


#ifdef R__HAS_TMVACPU

#include "TMVA/DNN/Architectures/Cpu/Blas.h"

#else

#include "TMVA/DNN/Architectures/Reference.h"

#include "TVectorT.h"

#endif


#if defined(__GNUC__)

#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wshadow"


//#include "tbb/tbb.h"


#pragma GCC diagnostic pop

#endif


namespace TMVA

{

namespace DNN

{


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::Multiply(TCpuMatrix<AReal> &C,

                            const TCpuMatrix<AReal> &A,

                            const TCpuMatrix<AReal> &B)

{

    int m = (int) A.GetNrows();

    int k = (int) A.GetNcols();

    int n = (int) B.GetNcols();


    R__ASSERT((int) C.GetNrows() == m);

    R__ASSERT((int) C.GetNcols() == n);

    R__ASSERT((int) B.GetNrows() == k);


#ifdef R__HAS_TMVACPU


    char transa = 'N';

    char transb = 'N';


    AReal alpha = 1.0;

    AReal beta  = 0.0;


    const AReal * APointer = A.GetRawDataPointer();

    const AReal * BPointer = B.GetRawDataPointer();

          AReal * CPointer = C.GetRawDataPointer();


    ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,

                            APointer, &m, BPointer, &k, &beta, CPointer, &m);

#else

   TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());

   tmp.Mult(A,B);

   C = tmp;

#endif

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::TransposeMultiply(TCpuMatrix<AReal> &C,

                                     const TCpuMatrix<AReal> &A,

                                     const TCpuMatrix<AReal> &B,

                                     AReal alpha, AReal beta)

{

#ifdef R__HAS_TMVACPU

    int m = (int) A.GetNcols();

    int k = (int) A.GetNrows();

    int n = (int) B.GetNcols();


    R__ASSERT((int) C.GetNrows() == m);

    R__ASSERT((int) C.GetNcols() == n);

    R__ASSERT((int) B.GetNrows() == k);


    char transa = 'T';

    char transb = 'N';


    //AReal alpha = 1.0;

    //AReal beta  = 0.0;


    const AReal *APointer = A.GetRawDataPointer();

    const AReal *BPointer = B.GetRawDataPointer();

          AReal *CPointer = C.GetRawDataPointer();


    ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,

                            APointer, &k, BPointer, &k, &beta, CPointer, &m);

#else

   TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());

   tmp.TMult(A, B);

   tmp = alpha * tmp;

   if (beta != 0.0) {

      TMatrixT<AReal> tmp0(C);

      tmp = tmp + beta * tmp0;

   }

   C = tmp;

#endif

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::Hadamard(TCpuMatrix<AReal> &B,

                            const TCpuMatrix<AReal> &A)

{

   const AReal *dataA      = A.GetRawDataPointer();

   AReal *dataB      = B.GetRawDataPointer();


   size_t nElements =  A.GetNoElements();

   R__ASSERT(B.GetNoElements() == nElements);

   size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);


   auto f = [&](UInt_t workerID)

   {

      for (size_t j = 0; j < nSteps; ++j) {

         size_t idx = workerID+j;

         if (idx >= nElements) break;

         dataB[idx] *= dataA[idx];

      }

      return 0;

   };


   if (nSteps < nElements) {

#ifdef DL_USE_MTE

      B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));

#else

      for (size_t i = 0;  i < nElements ; i+= nSteps)

         f(i);

#endif

   }

   else {

      f(0);

   }

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::Hadamard(TCpuTensor<AReal> &B,

                            const TCpuTensor<AReal> &A)

{

   const AReal *dataA      = A.GetRawDataPointer();

   AReal *dataB      = B.GetRawDataPointer();


   size_t nElements =  A.GetNoElements();

   R__ASSERT(B.GetNoElements() == nElements);

   size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);


   auto f = [&](UInt_t workerID)

   {

      for (size_t j = 0; j < nSteps; ++j) {

         size_t idx = workerID+j;

         if (idx >= nElements) break;

         dataB[idx] *= dataA[idx];

      }

      return 0;

   };


   if (nSteps < nElements) {

#ifdef DL_USE_MTE

      TMVA::Config::Instance().GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));

#else

      for (size_t i = 0;  i < nElements ; i+= nSteps)

         f(i);

#endif

   }

   else {

      f(0);

   }

}


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

/// \brief Checks two matrices for element-wise equality.

/// \tparam AReal An architecture-specific floating point number type.

/// \param A The first matrix.

/// \param B The second matrix.

/// \param epsilon Equality tolerance, needed to address floating point arithmetic.

/// \return Whether the two matrices can be considered equal element-wise

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

template<typename AReal>

bool TCpu<AReal>::AlmostEquals(const TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> &B, double epsilon)

{

    if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {

        Fatal("AlmostEquals", "The passed matrices have unequal shapes.");

    }


    const AReal *dataA = A.GetRawDataPointer();

    const AReal *dataB = B.GetRawDataPointer();

    size_t nElements =  A.GetNoElements();


    for(size_t i = 0; i < nElements; i++) {

        if(fabs(dataA[i] - dataB[i]) > epsilon) return false;

    }

    return true;

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::SumColumns(TCpuMatrix<AReal> &B,

                              const TCpuMatrix<AReal> &A,

                              AReal alpha, AReal beta)

{


   int m = (int) A.GetNrows();

   int n = (int) A.GetNcols();


   assert((int) B.GetNoElements() >= n);


#ifdef R__HAS_TMVACPU

   int inc = 1;

   char   trans   = 'T';


   const AReal * APointer = A.GetRawDataPointer();

         AReal * BPointer = B.GetRawDataPointer();


   // compute B = alpha * A * I + beta * B


   ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,

                           TCpuMatrix<AReal>::GetOnePointer(), &inc,

                           &beta, BPointer, &inc);

#else

   TMatrixT<AReal> tA(A);

   tA.T();

   TVectorT<AReal> ones(m, TCpuMatrix<AReal>::GetOnePointer());

   TVectorT<AReal> tmp(n, B.GetRawDataPointer());

   assert(B.GetNrows() == 1 || B.GetNcols() == 1);

   tmp = alpha * tA * ones + beta * tmp;

   // copy result buffer in B matrix

   std::copy(tmp.GetMatrixArray(), tmp.GetMatrixArray() + n, B.GetRawDataPointer());

#endif

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::ScaleAdd(TCpuMatrix<AReal> &B,

                            const TCpuMatrix<AReal> &A,

                            AReal alpha)

{

#ifdef R__HAS_TMVACPU

   int n = (int) (A.GetNcols() * A.GetNrows());

   int inc = 1;


   const AReal *x = A.GetRawDataPointer();

   AReal *y = B.GetRawDataPointer();


   ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);

#else

   TMatrixT<AReal> tmp(B);

   TReference<AReal>::ScaleAdd(tmp, A, alpha);

   B = tmp;

#endif

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::Copy(TCpuMatrix<AReal> &B,

                        const TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) {return x;};

   B.MapFrom(f, A);

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::ScaleAdd(TCpuTensor<AReal> &B,

                            const TCpuTensor<AReal> &A,

                            AReal alpha)

{

   // should re-implemented at tensor level

   for (size_t i = 0; i < B.GetFirstSize(); ++i) {

      TCpuMatrix<AReal> B_m = B.At(i).GetMatrix();

      ScaleAdd(B_m, A.At(i).GetMatrix(), alpha);

   }

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::Copy(TCpuTensor<AReal> &B,

                            const TCpuTensor<AReal> &A)

{


   auto f = [](AReal x) {return x;};

   B.MapFrom(f, A);

}


//____________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::ConstAdd(TCpuMatrix<AReal> &A, AReal beta)

{

   auto f = [beta](AReal x) { return x + beta; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::ConstMult(TCpuMatrix<AReal> &A, AReal beta)

{

   auto f = [beta](AReal x) { return x * beta; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::ReciprocalElementWise(TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) { return 1.0 / x; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::SquareElementWise(TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) { return x * x; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::SqrtElementWise(TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) { return sqrt(x); };

   A.Map(f);

}


/// Adam updates

//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::AdamUpdate(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & M, const TCpuMatrix<AReal> & V, AReal alpha, AReal eps)

{

   // ADAM update the weights.

   // Weight = Weight - alpha * M / (sqrt(V) + epsilon)

   AReal * a = A.GetRawDataPointer();

   const AReal * m = M.GetRawDataPointer();

   const AReal * v = V.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);

   }

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::AdamUpdateFirstMom(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & B, AReal beta)

{

   // First momentum weight gradient update for ADAM

   // Mt = beta1 * Mt-1 + (1-beta1) * WeightGradients

   AReal * a = A.GetRawDataPointer();

   const AReal * b = B.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = beta * a[index] + (1.-beta) * b[index];

   }

}

//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::AdamUpdateSecondMom(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & B, AReal beta)

{

   // Second momentum weight gradient update for ADAM

   // Vt = beta2 * Vt-1 + (1-beta2) * WeightGradients^2

   AReal * a = A.GetRawDataPointer();

   const AReal * b = B.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];

   }

}


} // DNN

} // TMVA

Blas.h

Cpu.h

b
#define b(i)
Definition RSha256.hxx:100

f
#define f(i)
Definition RSha256.hxx:104

a
#define a(i)
Definition RSha256.hxx:99

Reference.h

R__ASSERT
#define R__ASSERT(e)
Definition TError.h:118

Fatal
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Definition TError.cxx:245

TVectorT.h

AReal

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition TSeq.hxx:66

TMVA::Config::GetThreadExecutor
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition Config.h:81

TMVA::Config::Instance
static Config & Instance()
static function: returns TMVA instance
Definition Config.cxx:98

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition CpuMatrix.h:86

TMVA::DNN::TCpuMatrix::GetNcols
size_t GetNcols() const
Definition CpuMatrix.h:156

TMVA::DNN::TCpuMatrix::MapFrom
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix.
Definition CpuMatrix.h:241

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition CpuMatrix.h:166

TMVA::DNN::TCpuMatrix::GetNWorkItems
static size_t GetNWorkItems(size_t nelements)
Definition CpuMatrix.h:191

TMVA::DNN::TCpuMatrix::GetNrows
size_t GetNrows() const
Definition CpuMatrix.h:155

TMVA::DNN::TCpuMatrix::Map
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition CpuMatrix.h:212

TMVA::DNN::TCpuMatrix::GetThreadExecutor
static Executor & GetThreadExecutor()
Definition CpuMatrix.h:169

TMVA::DNN::TCpuMatrix::GetNoElements
size_t GetNoElements() const
Definition CpuMatrix.h:157

TMVA::DNN::TCpuTensor
Definition CpuTensor.h:40

TMVA::DNN::TCpuTensor::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition CpuTensor.h:141

TMVA::DNN::TCpuTensor::GetNoElements
size_t GetNoElements() const
Definition CpuTensor.h:149

TMVA::DNN::TCpuTensor::MapFrom
void MapFrom(Function_t &f, const TCpuTensor< AFloat > &A)
Same as maps but takes the input values from the tensor A and writes the results in this tensor.
Definition CpuTensor.h:354

TMVA::DNN::TCpuTensor::GetFirstSize
size_t GetFirstSize() const
Definition CpuTensor.h:155

TMVA::DNN::TCpuTensor::At
TCpuTensor< AFloat > At(size_t i)
Definition CpuTensor.h:221

TMVA::DNN::TCpu::TransposeMultiply
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
Definition Arithmetic.hxx:77

TMVA::DNN::TCpu::ScaleAdd
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition Arithmetic.hxx:248

TMVA::DNN::TCpu::ConstAdd
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
Definition Arithmetic.hxx:302

TMVA::DNN::TCpu::SumColumns
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
Definition Arithmetic.hxx:212

TMVA::DNN::TCpu::AlmostEquals
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
Definition Arithmetic.hxx:194

TMVA::DNN::TCpu::Hadamard
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
Definition Arithmetic.hxx:152

TMVA::DNN::TCpu::SqrtElementWise
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
Definition Arithmetic.hxx:334

TMVA::DNN::TCpu::Multiply
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
Definition Arithmetic.hxx:42

TMVA::DNN::TCpu::AdamUpdateSecondMom
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition Arithmetic.hxx:369

TMVA::DNN::TCpu::Copy
static void Copy(Matrix_t &B, const Matrix_t &A)
Definition Arithmetic.hxx:269

TMVA::DNN::TCpu::SquareElementWise
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
Definition Arithmetic.hxx:326

TMVA::DNN::TCpu::AdamUpdateFirstMom
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition Arithmetic.hxx:357

TMVA::DNN::TCpu::ConstMult
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
Definition Arithmetic.hxx:310

TMVA::DNN::TCpu::ReciprocalElementWise
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
Definition Arithmetic.hxx:318

TMVA::DNN::TCpu::AdamUpdate
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
Definition Arithmetic.hxx:343

TMVA::DNN::TReference::ScaleAdd
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition Propagation.hxx:76

TMVA::Executor::Foreach
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition Executor.h:111

TMatrixT
TMatrixT.
Definition TMatrixT.h:39

TMatrixT::T
TMatrixT< Element > & T()
Definition TMatrixT.h:150

TMatrixT::TMult
void TMult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
Create a matrix C such that C = A' * B.
Definition TMatrixT.cxx:853

TMatrixT::Mult
void Mult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
General matrix multiplication. Create a matrix C such that C = A * B.
Definition TMatrixT.cxx:649

TVectorT
TVectorT.
Definition TVectorT.h:27

TVectorT::GetMatrixArray
Element * GetMatrixArray()
Definition TVectorT.h:78

int

unsigned int

y
Double_t y[n]
Definition legend1.C:17

x
Double_t x[n]
Definition legend1.C:17

n
const Int_t n
Definition legend1.C:16

TMVA::DNN::Blas::Axpy
void Axpy(const int *n, const AReal *alpha, const AReal *x, const int *incx, AReal *y, const int *incy)
Add the vector x scaled by alpha to y scaled by \beta.

TMVA::DNN::Blas::Gemm
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const AReal *alpha, const AReal *A, const int *lda, const AReal *B, const int *ldb, const AReal *beta, AReal *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.

TMVA::DNN::Blas::Gemv
void Gemv(const char *trans, const int *m, const int *n, const AReal *alpha, const AReal *A, const int *lda, const AReal *x, const int *incx, const AReal *beta, AReal *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.

TMVA
create variable transformations
Definition GeneticMinimizer.h:22

v
@ v
Definition rootcling_impl.cxx:3670

m
auto * m
Definition textangle.C:8

epsilon
REAL epsilon
Definition triangle.c:618