doc/v616/Cpu_2Arithmetic_8cxx_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 20/07/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


////////////////////////////////////////////////////////////

//  Implementation of Helper arithmetic functions for the //

// multi-threaded CPU implementation of DNNs.             //

////////////////////////////////////////////////////////////


#include "TMVA/DNN/Architectures/Cpu.h"

#include "TMVA/DNN/Architectures/Cpu/Blas.h"


#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wshadow"


#include "tbb/tbb.h"


#pragma GCC diagnostic pop


namespace TMVA

{

namespace DNN

{


//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::Multiply(TCpuMatrix<Real_t> &C,

                            const TCpuMatrix<Real_t> &A,

                            const TCpuMatrix<Real_t> &B)

{

    int m = (int) A.GetNrows();

    int k = (int) A.GetNcols();

    int n = (int) B.GetNcols();


    R__ASSERT((int) C.GetNrows() == m);

    R__ASSERT((int) C.GetNcols() == n);

    R__ASSERT((int) B.GetNrows() == k);


    char transa = 'N';

    char transb = 'N';


    Real_t alpha = 1.0;

    Real_t beta  = 0.0;


    const Real_t * APointer = A.GetRawDataPointer();

    const Real_t * BPointer = B.GetRawDataPointer();

          Real_t * CPointer = C.GetRawDataPointer();


    ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,

                            APointer, &m, BPointer, &k, &beta, CPointer, &m);

}


//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::TransposeMultiply(TCpuMatrix<Real_t> &C,

                                     const TCpuMatrix<Real_t> &A,

                                     const TCpuMatrix<Real_t> &B,

                                     Real_t alpha, Real_t beta)

{

    int m = (int) A.GetNcols();

    int k = (int) A.GetNrows();

    int n = (int) B.GetNcols();


    R__ASSERT((int) C.GetNrows() == m);

    R__ASSERT((int) C.GetNcols() == n);

    R__ASSERT((int) B.GetNrows() == k);


    char transa = 'T';

    char transb = 'N';


    //Real_t alpha = 1.0;

    //Real_t beta  = 0.0;


    const Real_t *APointer = A.GetRawDataPointer();

    const Real_t *BPointer = B.GetRawDataPointer();

          Real_t *CPointer = C.GetRawDataPointer();


    ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,

                            APointer, &k, BPointer, &k, &beta, CPointer, &m);

}


//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::Hadamard(TCpuMatrix<Real_t> &B,

                            const TCpuMatrix<Real_t> &A)

{

   const Real_t *dataA      = A.GetRawDataPointer();

   Real_t *dataB      = B.GetRawDataPointer();


   size_t nElements =  A.GetNoElements();

   R__ASSERT(B.GetNoElements() == nElements);

   size_t nSteps = TCpuMatrix<Real_t>::GetNWorkItems(nElements);


   auto f = [&](UInt_t workerID)

   {

      for (size_t j = 0; j < nSteps; ++j) {

         size_t idx = workerID+j;

         if (idx >= nElements) break;

         dataB[idx] *= dataA[idx];

      }

      return 0;

   };


   if (nSteps < nElements) {

#ifdef DL_USE_MTE

      B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));

#else

      for (size_t i = 0;  i < nElements ; i+= nSteps)

         f(i);

#endif

   }

   else {

      f(0);

   }

}


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

/// \brief Checks two matrices for element-wise equality.

/// \tparam Real_t An architecture-specific floating point number type.

/// \param A The first matrix.

/// \param B The second matrix.

/// \param epsilon Equality tolerance, needed to address floating point arithmetic.

/// \return Whether the two matrices can be considered equal element-wise

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

template<typename Real_t>

bool TCpu<Real_t>::AlmostEquals(const TCpuMatrix<Real_t> &A, const TCpuMatrix<Real_t> &B, double epsilon)

{

    if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {

        Fatal("AlmostEquals", "The passed matrices have unequal shapes.");

    }


    const Real_t *dataA = A.GetRawDataPointer();

    const Real_t *dataB = B.GetRawDataPointer();

    size_t nElements =  A.GetNoElements();


    for(size_t i = 0; i < nElements; i++) {

        if(fabs(dataA[i] - dataB[i]) > epsilon) return false;

    }

    return true;

}


//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::SumColumns(TCpuMatrix<Real_t> &B,

                              const TCpuMatrix<Real_t> &A,

                              Real_t alpha, Real_t beta)

{

   int m = (int) A.GetNrows();

   int n = (int) A.GetNcols();

   int inc = 1;


   // Real_t alpha = 1.0;

   //Real_t beta  = 0.0;

   char   trans   = 'T';


   const Real_t * APointer = A.GetRawDataPointer();

         Real_t * BPointer = B.GetRawDataPointer();


   ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,

                           TCpuMatrix<Real_t>::GetOnePointer(), &inc,

                           &beta, BPointer, &inc);

}


//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::ScaleAdd(TCpuMatrix<Real_t> &B,

                            const TCpuMatrix<Real_t> &A,

                            Real_t alpha)

{

   int n = (int) (A.GetNcols() * A.GetNrows());

   int inc = 1;


   const Real_t *x = A.GetRawDataPointer();

   Real_t *y = B.GetRawDataPointer();


   ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);

}


//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::Copy(TCpuMatrix<Real_t> &B,

                        const TCpuMatrix<Real_t> &A)

{

   auto f = [](Real_t x) {return x;};

   B.MapFrom(f, A);

}


//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::ScaleAdd(std::vector<TCpuMatrix<Real_t>> &B,

                            const std::vector<TCpuMatrix<Real_t>> &A,

                            Real_t alpha)

{

   for (size_t i = 0; i < B.size(); ++i) {

      ScaleAdd(B[i], A[i], alpha);

   }

}


//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::Copy(std::vector<TCpuMatrix<Real_t>> &B,

                            const std::vector<TCpuMatrix<Real_t>> &A)

{

   for (size_t i = 0; i < B.size(); ++i) {

      Copy(B[i], A[i]);

   }

}


//____________________________________________________________________________

template <typename Real_t>

void TCpu<Real_t>::ConstAdd(TCpuMatrix<Real_t> &A, Real_t beta)

{

   auto f = [beta](Real_t x) { return x + beta; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename Real_t>

void TCpu<Real_t>::ConstMult(TCpuMatrix<Real_t> &A, Real_t beta)

{

   auto f = [beta](Real_t x) { return x * beta; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename Real_t>

void TCpu<Real_t>::ReciprocalElementWise(TCpuMatrix<Real_t> &A)

{

   auto f = [](Real_t x) { return 1.0 / x; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename Real_t>

void TCpu<Real_t>::SquareElementWise(TCpuMatrix<Real_t> &A)

{

   auto f = [](Real_t x) { return x * x; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename Real_t>

void TCpu<Real_t>::SqrtElementWise(TCpuMatrix<Real_t> &A)

{

   auto f = [](Real_t x) { return sqrt(x); };

   A.Map(f);

}


/// Adam updates

//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::AdamUpdate(TCpuMatrix<Real_t> &A, const TCpuMatrix<Real_t> & M, const TCpuMatrix<Real_t> & V, Real_t alpha, Real_t eps)

{

   // ADAM update the weights.

   // Weight = Weight - alpha * M / (sqrt(V) + epsilon)

   Real_t * a = A.GetRawDataPointer();

   const Real_t * m = M.GetRawDataPointer();

   const Real_t * v = V.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);

   }

}


//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::AdamUpdateFirstMom(TCpuMatrix<Real_t> &A, const TCpuMatrix<Real_t> & B, Real_t beta)

{

   // First momentum weight gradient update for ADAM

   // Mt = beta1 * Mt-1 + (1-beta1) * WeightGradients

   Real_t * a = A.GetRawDataPointer();

   const Real_t * b = B.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = beta * a[index] + (1.-beta) * b[index];

   }

}

//____________________________________________________________________________

template<typename Real_t>

void TCpu<Real_t>::AdamUpdateSecondMom(TCpuMatrix<Real_t> &A, const TCpuMatrix<Real_t> & B, Real_t beta)

{

   // Second momentum weight gradient update for ADAM

   // Vt = beta2 * Vt-1 + (1-beta2) * WeightGradients^2

   Real_t * a = A.GetRawDataPointer();

   const Real_t * b = B.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];

   }

}


} // DNN

} // TMVA

Blas.h

Cpu.h

v
SVector< double, 2 > v
Definition: Dict.h:5

b
#define b(i)
Definition: RSha256.hxx:100

f
#define f(i)
Definition: RSha256.hxx:104

Real_t
float Real_t
Definition: RtypesCore.h:64

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:96

Fatal
void Fatal(const char *location, const char *msgfmt,...)

sqrt
double sqrt(double)

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:89

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:152

TMVA::DNN::TCpuMatrix::GetNWorkItems
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:180

TMVA::DNN::TCpu::Hadamard
static void Hadamard(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
Definition: Arithmetic.cxx:91

TMVA::DNN::TCpu::Copy
static void Copy(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)

TMVA::DNN::TCpu::AdamUpdateSecondMom
static void AdamUpdateSecondMom(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta)
Definition: Arithmetic.cxx:286

TMVA::DNN::TCpu::TransposeMultiply
static void TransposeMultiply(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
Definition: Arithmetic.cxx:62

TMVA::DNN::TCpu::AdamUpdateFirstMom
static void AdamUpdateFirstMom(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta)
Definition: Arithmetic.cxx:274

TMVA::DNN::TCpu::AdamUpdate
static void AdamUpdate(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &M, const TCpuMatrix< Scalar_t > &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
Definition: Arithmetic.cxx:260

TMVA::DNN::TCpu::ConstAdd
static void ConstAdd(TCpuMatrix< Scalar_t > &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.cxx:219

TMVA::DNN::TCpu::SqrtElementWise
static void SqrtElementWise(TCpuMatrix< Scalar_t > &A)
Square root each element of the matrix A and write the result into A.
Definition: Arithmetic.cxx:251

TMVA::DNN::TCpu::Multiply
static void Multiply(TCpuMatrix< Scalar_t > &C, const TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
Standard multiplication of two matrices A and B with the result being written into C.
Definition: Arithmetic.cxx:34

TMVA::DNN::TCpu::AlmostEquals
static bool AlmostEquals(const TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
Definition: Arithmetic.cxx:133

TMVA::DNN::TCpu::SumColumns
static void SumColumns(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
Definition: Arithmetic.cxx:151

TMVA::DNN::TCpu::ReciprocalElementWise
static void ReciprocalElementWise(TCpuMatrix< Scalar_t > &A)
Reciprocal each element of the matrix A and write the result into A.
Definition: Arithmetic.cxx:235

TMVA::DNN::TCpu::ScaleAdd
static void ScaleAdd(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.

TMVA::DNN::TCpu::ConstMult
static void ConstMult(TCpuMatrix< Scalar_t > &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.cxx:227

TMVA::DNN::TCpu::SquareElementWise
static void SquareElementWise(TCpuMatrix< Scalar_t > &A)
Square each element of the matrix A and write the result into A.
Definition: Arithmetic.cxx:243

ROOT::Math::beta
double beta(double x, double y)
Calculates the beta function.
Definition: SpecFuncMathCore.cxx:111

y
Double_t y[n]
Definition: legend1.C:17

x
Double_t x[n]
Definition: legend1.C:17

n
const Int_t n
Definition: legend1.C:16

ROOT::Math::Cephes::B
static double B[]
Definition: SpecFuncCephes.cxx:178

ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170

ROOT::Math::Cephes::C
static double C[]
Definition: SpecFuncCephes.cxx:187

ROOT::Math::GSLSimAn::Copy
void Copy(void *source, void *dest)
Definition: GSLSimAnnealing.cxx:149

ROOT::Math::fabs
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Definition: UnaryOperators.h:131

TMVA::DNN::Blas::Gemv
void Gemv(const char *trans, const int *m, const int *n, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *x, const int *incx, const Real_t *beta, Real_t *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.

TMVA::DNN::Blas::Axpy
void Axpy(const int *n, const Real_t *alpha, const Real_t *x, const int *incx, Real_t *y, const int *incy)
Add the vector x scaled by alpha to y scaled by \beta.

TMVA::DNN::Blas::Gemm
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *B, const int *ldb, const Real_t *beta, Real_t *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.

TMVA
Abstract ClassifierFactory template that handles arbitrary types.
Definition: GeneticMinimizer.h:21

m
auto * m
Definition: textangle.C:8

a
auto * a
Definition: textangle.C:12

epsilon
REAL epsilon
Definition: triangle.c:617