doc/v616/Cpu_2Regularization_8cxx_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 21/07/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


///////////////////////////////////////////////////////////////////////

// Implementation of the regularization functionals and gradients    //

// for the multi-threaded CPU implementation using Roots TThreadExecutor. //

///////////////////////////////////////////////////////////////////////


#include "TMVA/DNN/Architectures/Reference.h"


namespace TMVA

{

namespace DNN

{


//______________________________________________________________________________

template<typename AFloat>

AFloat TCpu<AFloat>::L1Regularization(const TCpuMatrix<AFloat> &Weights)

{

   const AFloat  *data = Weights.GetRawDataPointer();


   size_t nElements =  Weights.GetNoElements();

   size_t nSteps = TCpuMatrix<AFloat>::GetNWorkItems(nElements);


   std::vector<AFloat> temp(nElements/nSteps + 1);


   auto f = [&data, &temp, nElements, nSteps](UInt_t workerID)

   {

      size_t iMax = std::min(workerID+nSteps, nElements);

      size_t iWorker = workerID/nSteps;

      for (size_t i = workerID; i < iMax; ++i) {

         temp[iWorker] += fabs(data[i]);

      }

   };


   auto reduction = [](const std::vector<AFloat> & v )

   {

      return std::accumulate(v.begin(),v.end(),AFloat{});

   };

   // auto reduction = [](AFloat sum1, AFloat sum2)

   // {

   //    return sum1 + sum2;

   // };

   Weights.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps) );

   return Weights.GetThreadExecutor().Reduce(temp, reduction);

}


//______________________________________________________________________________

template<typename AFloat>

void TCpu<AFloat>::AddL1RegularizationGradients(

    TCpuMatrix<AFloat> & B,

    const TCpuMatrix<AFloat> & A,

    AFloat weightDecay)

{

         AFloat  *dataB     =  B.GetRawDataPointer();

   const AFloat  *dataA      = A.GetRawDataPointer();


   size_t nElements =  B.GetNoElements();

   R__ASSERT(A.GetNoElements() == nElements);

   size_t nSteps = TCpuMatrix<AFloat>::GetNWorkItems(nElements);


   auto f = [&dataA, &dataB, weightDecay, nElements, nSteps](UInt_t workerID)

   {

      size_t iMax = std::min(workerID+nSteps, nElements);

      for (size_t i = workerID; i < iMax; ++i) {

         AFloat sign = (dataA[i] < 0.0) ? -1.0 : 1.0;

         dataB[i] += weightDecay * sign;

      }

      return 0;

   };


   if (nSteps < nElements) {

#ifdef DL_USE_MTE

      B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements, nSteps));

#else

      for (size_t i = 0;  i < nElements; i+=nSteps)

         f(i);

#endif

   } else  {

      f(0);

   }

}


//______________________________________________________________________________

template<typename AFloat>

AFloat TCpu<AFloat>::L2Regularization(const TCpuMatrix<AFloat> &Weights)

{

   const AFloat  *data = Weights.GetRawDataPointer();


   size_t nElements =  Weights.GetNoElements();

   size_t nSteps = TCpuMatrix<AFloat>::GetNWorkItems(nElements);


   std::vector<AFloat> temp(nElements/nSteps + 1);


   auto f = [&data, &temp, nElements, nSteps](UInt_t workerID)

   {

      size_t iMax = std::min(workerID+nSteps, nElements);

      size_t iWorker = workerID/nSteps;


      for (size_t i = workerID; i < iMax; ++i) {

         temp[iWorker] += data[i] * data[i];

      }

   };


   auto reduction = [](const std::vector<AFloat> & v )

   {

      return std::accumulate(v.begin(),v.end(),AFloat{});

   };

   // auto reduction = [](AFloat sum1, AFloat sum2)

   // {

   //    return sum1 + sum2;

   // };


   Weights.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps) );

   return Weights.GetThreadExecutor().Reduce(temp, reduction);

}


//______________________________________________________________________________

template<typename AFloat>

void TCpu<AFloat>::AddL2RegularizationGradients(

    TCpuMatrix<AFloat> & B,

    const TCpuMatrix<AFloat> & A,

    AFloat weightDecay)

{

         AFloat  *dataB     =  B.GetRawDataPointer();

   const AFloat  *dataA      = A.GetRawDataPointer();


      size_t nElements =  B.GetNoElements();

   R__ASSERT(A.GetNoElements() == nElements);

   size_t nSteps = TCpuMatrix<AFloat>::GetNWorkItems(nElements);


   auto f = [&dataA, &dataB, weightDecay, nElements, nSteps](UInt_t workerID)

   {

      size_t iMax = std::min(workerID+nSteps, nElements);

      for (size_t i = workerID; i < iMax; ++i) {

         dataB[i] += 2.0 * weightDecay * dataA[i];

      }

      return 0;

   };


   if (nSteps < nElements) {

#ifdef DL_USE_MTE

      B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements, nSteps));

#else

      for (size_t i = 0;  i < nElements; i+=nSteps)

         f(i);

#endif

   } else {

      f(0);

   }

}


} // namespace DNN

} // namespace TMVA

v
SVector< double, 2 > v
Definition: Dict.h:5

f
#define f(i)
Definition: RSha256.hxx:104

Reference.h

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:96

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66

ROOT::TThreadExecutor::Reduce
auto Reduce(const std::vector< T > &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
"Reduce" an std::vector into a single object in parallel by passing a binary operator as the second a...
Definition: TThreadExecutor.hxx:442

ROOT::TThreadExecutor::Foreach
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
Definition: TThreadExecutor.hxx:118

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:89

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:152

TMVA::DNN::TCpuMatrix::GetNWorkItems
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:180

TMVA::DNN::TCpuMatrix::GetThreadExecutor
static ROOT::TThreadExecutor & GetThreadExecutor()
Definition: CpuMatrix.h:155

TMVA::DNN::TCpuMatrix::GetNoElements
size_t GetNoElements() const
Definition: CpuMatrix.h:144

TMVA::DNN::TCpu::AddL2RegularizationGradients
static void AddL2RegularizationGradients(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &W, Scalar_t weightDecay)
Definition: Regularization.cxx:131

TMVA::DNN::TCpu::L2Regularization
static Scalar_t L2Regularization(const TCpuMatrix< Scalar_t > &W)
Definition: Regularization.cxx:97

TMVA::DNN::TCpu::L1Regularization
static Scalar_t L1Regularization(const TCpuMatrix< Scalar_t > &W)
Definition: Regularization.cxx:26

TMVA::DNN::TCpu::AddL1RegularizationGradients
static void AddL1RegularizationGradients(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &W, Scalar_t weightDecay)
Definition: Regularization.cxx:59

ApplicationClassificationKeras.data
data
Definition: ApplicationClassificationKeras.py:17

ROOT::Math::Cephes::B
static double B[]
Definition: SpecFuncCephes.cxx:178

ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170

ROOT::Math::fabs
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Definition: UnaryOperators.h:131

TMVA::DNN::weightDecay
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496

TMVA
Abstract ClassifierFactory template that handles arbitrary types.
Definition: GeneticMinimizer.h:21