doc/v618/Cpu_2Propagation_8cxx_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 10/07/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


//////////////////////////////////////////////////////////////////////

// Implementation of the functions required for the forward and     //

// backward propagation of activations through a neural network for //

// the reference implementation.                                    //

//////////////////////////////////////////////////////////////////////


#include "TMVA/DNN/Architectures/Cpu.h"

#include "TMVA/DNN/Architectures/Cpu/Blas.h"


namespace TMVA {

namespace DNN {


template <typename AFloat>

void TCpu<AFloat>::MultiplyTranspose(TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &input,

                                     const TCpuMatrix<AFloat> &Weights)

{


   int m = (int)input.GetNrows();

   int k = (int)input.GetNcols();

   int n = (int)Weights.GetNrows();


   if ((int)output.GetNrows() != m) {

      Error("MultiplyTranspose","Invalid input - output  rows  - input:  %d != output : %d",m, (int) output.GetNrows());

      R__ASSERT((int) output.GetNrows() == m);

   }

   if ((int)output.GetNcols() != n) {

      Error("MultiplyTranspose","Invalid output cols or weight  rows  - output cols:  %d != weight rows : %d",(int) output.GetNcols(),n);

      R__ASSERT((int) output.GetNcols() == n);

   }

   if ((int)Weights.GetNcols() != k) {

      Error("MultiplyTranspose","Invalid input cols or weight cols  - input cols:  %d != weight cols : %d", k, (int) Weights.GetNcols());

      R__ASSERT((int) Weights.GetNcols() == k);

   }


   char transa = 'N';

   char transb = 'T';


   AFloat alpha = 1.0;

   AFloat beta = 0.0;


   const AFloat *A = input.GetRawDataPointer();

   const AFloat *B = Weights.GetRawDataPointer();

   AFloat *C = output.GetRawDataPointer();


   ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha, A, &m, B, &n, &beta, C, &m);

}


template <typename AFloat>

void TCpu<AFloat>::AddRowWise(TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &biases)

{

   int m = (int)output.GetNrows();

   int n = (int)output.GetNcols();


   int inc = 1.0;

   AFloat alpha = 1.0;


   AFloat *A = output.GetRawDataPointer();

   const AFloat *x = TCpuMatrix<AFloat>::GetOnePointer();

   const AFloat *y = biases.GetRawDataPointer();


   R__ASSERT(m <= (int)TCpuMatrix<AFloat>::GetOnePointerSize());

   R__ASSERT(n <= (int)(biases.GetNcols()*biases.GetNrows()));


   ::TMVA::DNN::Blas::Ger(&m, &n, &alpha, x, &inc, y, &inc, A, &m);

}


template <typename AFloat>

void TCpu<AFloat>::Backward(TCpuMatrix<AFloat> &activationGradientsBackward, TCpuMatrix<AFloat> &weightGradients,

                            TCpuMatrix<AFloat> &biasGradients, TCpuMatrix<AFloat> &df,

                            const TCpuMatrix<AFloat> &activationGradients, const TCpuMatrix<AFloat> &weights,

                            const TCpuMatrix<AFloat> &activationsBackward)

{

   // Compute element-wise product.

   Hadamard(df, activationGradients);


   // Activation gradients.

   if (activationGradientsBackward.GetNoElements() > 0) Multiply(activationGradientsBackward, df, weights);


   // Weight gradients.

   if (weightGradients.GetNoElements() > 0) TransposeMultiply(weightGradients, df, activationsBackward);


   // Bias gradients.

   if (biasGradients.GetNoElements() > 0) SumColumns(biasGradients, df);

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::Im2col(TCpuMatrix<AFloat> &A, const TCpuMatrix<AFloat> &B, size_t imgHeight, size_t imgWidth,

                          size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,

                          size_t zeroPaddingHeight, size_t zeroPaddingWidth)

{


   // image boudaries

   int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;

   int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;

   size_t currLocalView = 0;


   const int halfFltHeight =  fltHeight / 2;

   const int halfFltWidth =  fltWidth / 2;

   const int halfFltHeightM1 = (fltHeight - 1) / 2;

   const int halfFltWidthM1 = (fltWidth - 1) / 2;

   const int nRowsInput = B.GetNrows();

   const int nColsInput = B.GetNcols();

   const int nRowsOutput = A.GetNrows();

   const int nColsOutput = A.GetNcols();


   // convolution centers

   for (int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {

      for (int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {

         size_t currLocalViewPixel = 0;


         // within the local view

         R__ASSERT((int) currLocalView < nRowsOutput );


         for (int m = 0; m < nRowsInput; m++) {

            for (int k = i - halfFltHeight  ; k <= Int_t(i + halfFltHeightM1 ); k++) {

               int kstep = k * imgWidth;

               for (int l = j - halfFltWidth ; l <= Int_t(j + halfFltWidthM1); l++) {


                  // Check the boundaries

                  R__ASSERT((int) currLocalViewPixel < nColsOutput );

                  //R__ASSERT(k * imgWidth + l < B.GetNcols());

                  if (k < 0 || k >= (Int_t)imgHeight || l < 0 || l >= (Int_t)imgWidth || kstep + l >=  nColsInput)

                     A(currLocalView, currLocalViewPixel++) = 0;

                  else

                     A(currLocalView, currLocalViewPixel++) = B(m, kstep + l);

               }

            }

         }

         //std::cout << " i " << i << "  " << j << " increment currLocalView " << currLocalView << std::endl;

         currLocalView++;

      }

   }

   //TMVA_DNN_PrintTCpuMatrix(A,"FromIm2Col");

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::Im2colIndices(std::vector<int> &V, const TCpuMatrix<AFloat> &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth,

                          size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,

                           size_t zeroPaddingHeight, size_t zeroPaddingWidth)

{


   // image boudaries

   int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;

   int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;

   size_t currLocalView = 0;


   const int halfFltHeight =  fltHeight / 2;

   const int halfFltWidth =  fltWidth / 2;

   const int halfFltHeightM1 = (fltHeight - 1) / 2;

   const int halfFltWidthM1 = (fltWidth - 1) / 2;

   const int nRowsInput = B.GetNrows();

   const int nColsInput = B.GetNcols();

   const size_t nSizeOutput = V.size();

   const int npixels =  nRowsInput * fltHeight * fltWidth;

   // const int nRowsOutput = A.GetNrows();

   // const int nColsOutput = A.GetNcols();


   // convolution centers

   for (int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {

      for (int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {

         size_t currLocalViewPixel = 0;


         // within the local view

         //R__ASSERT((int) currLocalView < nRowsOutput );


         for (int m = 0; m < nRowsInput; m++) {

            for (int k = i - halfFltHeight  ; k <= Int_t(i + halfFltHeightM1 ); k++) {

               int kstep = k * imgWidth;

               for (int l = j - halfFltWidth ; l <= Int_t(j + halfFltWidthM1); l++) {


                  // Check the boundaries

                  //R__ASSERT(currLocalViewPixel < nColsOutput );

                  R__ASSERT(currLocalView * npixels + currLocalViewPixel < nSizeOutput );

                  if (k < 0 || k >= (Int_t)imgHeight || l < 0 || l >= (Int_t)imgWidth || kstep + l >=  nColsInput)

                     //V[currLocalView * npixels + currLocalViewPixel]=-1;

                     V[currLocalViewPixel * nLocalViews + currLocalView] = -1;

                  else

                     V[currLocalViewPixel * nLocalViews + currLocalView]= ( kstep + l) * nRowsInput + m;


                  currLocalViewPixel++;

               }

            }

         }

         currLocalView++;

      }

   }

}

template <typename AFloat>

void TCpu<AFloat>::Im2colFast(TCpuMatrix<AFloat> &A, const TCpuMatrix<AFloat> &B, const std::vector<int> &V)

{

   size_t  n = V.size();

   R__ASSERT( n == A.GetNcols() * A.GetNrows() );

   AFloat *  a = A.GetRawDataPointer();

   const AFloat *  b = B.GetRawDataPointer();


//#define DL_USE_MTE

   // parallel execution

#ifdef DL_USE_MTE

   const size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(n);


   auto f = [&](UInt_t workerID)

   {

      for (size_t j = 0; j < nsteps; ++j) {

         size_t ii = workerID+j;

         if (ii >= n) break;

         int idx = V[ii];

         if (idx >= 0) a[ii] = b[idx];

         else a[ii] = 0;

      }

      return 0;

   };


   A.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,n,nsteps) );


#else

   //serial execution

   for (size_t ii = 0; ii < n; ++ii) {

      int idx = V[ii];

      if (idx >= 0) a[ii] = b[idx];

      else a[ii] = 0;

   }


#endif

   // TMVA_DNN_PrintTCpuMatrix(A,"FromFastIm2Col");

   // TMVA_DNN_PrintTCpuMatrix(B,"input to Im2Col");

   // std::cout << "V vector " << V.size() << std::endl;

   // for ( int i = 0; i < n; ++i) {

   //    std::cout << V[i] << "  ";

   // }

   // std::cout << std::endl;

}

//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::RotateWeights(TCpuMatrix<AFloat> &A, const TCpuMatrix<AFloat> &B, size_t filterDepth,

                                 size_t filterHeight, size_t filterWidth, size_t numFilters)

{

   size_t jump = filterHeight * filterWidth;

   for (size_t j = 0; j < filterDepth; j++) {

      for (size_t k = 0; k < numFilters; k++) {

         for (size_t i = 0; i < jump; i++) {

            A(j, k * jump + i) = B(k, ((j + 1) * jump - 1) - i);

            //A(j, k * jump + i) = B(k, j * jump + i);

         }

      }

   }

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::AddConvBiases(TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &biases)

{

   int m = (int)output.GetNrows();

   int n = (int)output.GetNcols();


   int inc = 1.0;

   AFloat alpha = 1.0;


   AFloat *A = output.GetRawDataPointer();

   const AFloat *x = biases.GetRawDataPointer();

   const AFloat *y = TCpuMatrix<AFloat>::GetOnePointer();


   R__ASSERT(m <= (int)biases.GetNoElements() );

   R__ASSERT(n <= (int)TCpuMatrix<AFloat>::GetOnePointerSize() );


   ::TMVA::DNN::Blas::Ger(&m, &n, &alpha, x, &inc, y, &inc, A, &m);

}


template<typename AFloat>

size_t TCpu<AFloat>::calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)

{

   size_t temp = imgDim - fltDim + 2 * padding;

   if (temp % stride || temp + stride <= 0) {

      Fatal("calculateDimension", "Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride) "

            "%zu, %zu, %zu, %zu", imgDim, fltDim, padding, stride);

   }

   return temp / stride + 1;

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::ConvLayerForward(std::vector<TCpuMatrix<AFloat>> & output,

                                    std::vector<TCpuMatrix<AFloat>> & derivatives,

                                    const std::vector<TCpuMatrix<AFloat>> &input,

                                    const TCpuMatrix<AFloat> &weights, const TCpuMatrix<AFloat> & biases,

                                    const DNN::CNN::TConvParams & params, EActivationFunction activFunc,

                                    std::vector<TCpuMatrix<AFloat>> & /*  */)

{

   size_t height = calculateDimension(params.inputHeight, params.filterHeight, params.paddingHeight, params.strideRows);

   size_t width = calculateDimension(params.inputWidth, params.filterWidth, params.paddingWidth, params.strideCols);

   size_t nLocalViews = height * width;

   size_t nLocalViewPixels = params.inputDepth * params.filterHeight * params.filterWidth;


   R__ASSERT( input.size() > 0);

   std::vector<int> forwardIndices(nLocalViews * nLocalViewPixels);

   Im2colIndices(forwardIndices, input[0], nLocalViews, params.inputHeight, params.inputWidth, params.filterHeight,

                 params.filterWidth, params.strideRows, params.strideCols, params.paddingHeight, params.paddingWidth);


   //this should fix multi-thread inizializations of arrays

   TCpuMatrix<AFloat>::InitializeOneVector(nLocalViews);

   TCpuMatrix<AFloat>::InitializeOneVector(output[0].GetNcols());   // since it is used in AddCOnvBiases


   auto f = [&] (UInt_t i)

   {

       // dropout not yet implemented for CNN

       // if (applyDropout && (dropoutProbability != 1.0)) {

       //    Dropout(input[i], dropoutProbability);

       // }


       TCpuMatrix<AFloat> inputTr(nLocalViews, nLocalViewPixels);

       //inputTr.Zero();   // this is not thread safe


       Im2colFast(inputTr, input[i], forwardIndices);


       MultiplyTranspose(output[i], weights, inputTr);

       AddConvBiases(output[i], biases);


       evaluateDerivative<TCpu<AFloat>>(derivatives[i], activFunc, output[i]);

       evaluate<TCpu<AFloat>>(output[i], activFunc);


   };


   TCpuMatrix<AFloat>::GetThreadExecutor().Foreach(f, ROOT::TSeqI(input.size()));

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::ConvLayerBackward(std::vector<TCpuMatrix<AFloat>> &activationGradientsBackward,

                                     TCpuMatrix<AFloat> &weightGradients, TCpuMatrix<AFloat> &biasGradients,

                                     std::vector<TCpuMatrix<AFloat>> &df,

                                     const std::vector<TCpuMatrix<AFloat>> &activationGradients,

                                     const TCpuMatrix<AFloat> &weights,

                                     const std::vector<TCpuMatrix<AFloat>> &activationsBackward, size_t batchSize,

                                     size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width,

                                     size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)

{

   // Update derivatives

   //    size_t m, n;

   //    m = activationGradients[0].GetNrows();

   //    n = activationGradients[0].GetNcols();


   for (size_t i = 0; i < batchSize; i++) {

      // Compute element-wise product.

      Hadamard(df[i], activationGradients[i]);

   }


   // Calculate the activation gradients of the previous layer

   CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth, depth,

                                    height, width, filterDepth, filterHeight, filterWidth);


   // Calculate the weight gradients

   CalculateConvWeightGradients(weightGradients, df, activationsBackward, batchSize, inputHeight, inputWidth, depth,

                                height, width, filterDepth, filterHeight, filterWidth, nLocalViews);


   // Calculate the bias gradients

   CalculateConvBiasGradients(biasGradients, df, batchSize, depth, nLocalViews);

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::CalculateConvActivationGradients(std::vector<TCpuMatrix<AFloat>> &activationGradientsBackward,

                                                    const std::vector<TCpuMatrix<AFloat>> &df,

                                                    const TCpuMatrix<AFloat> &weights, size_t batchSize,

                                                    size_t inputHeight, size_t inputWidth, size_t depth, size_t height,

                                                    size_t width, size_t filterDepth, size_t filterHeight,

                                                    size_t filterWidth)

{

   if (activationGradientsBackward.size() == 0) return;


   for (size_t i = 0; i < activationGradientsBackward.size(); i++) {

      activationGradientsBackward[i].Zero();

   }


   // Transform the weights


   //TMVA_DNN_PrintTCpuMatrix(weights,"weights");

   // filter depth must be same as input depth

   TCpuMatrix<AFloat> rotWeights(filterDepth, depth * filterHeight * filterWidth);

   RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.GetNrows());

   //TMVA_DNN_PrintTCpuMatrix(rotWeights,"rot-weights");


   // Calculate the zero paddings

   size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight - height + filterHeight - 1) / 2));

   size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth - width + filterWidth - 1) / 2));


   // size_t tempZeroPaddingHeight = 1;

   // size_t tempZeroPaddingWidth = 1;


   // Calculate the number of local views and the number of pixles in each view

   size_t tempNLocalViews = inputHeight * inputWidth;

   size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;


   size_t tempStrideRows = 1;

   size_t tempStrideCols = 1;


   // An entire convolution follows


    std::vector<int> vIndices( tempNLocalViews * tempNLocalViewPixels );

    Im2colIndices(vIndices, df[0], tempNLocalViews, height, width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,

             tempZeroPaddingHeight, tempZeroPaddingWidth);


    //for (size_t i = 0; i < batchSize; i++) {

    R__ASSERT(batchSize == df.size() );

    R__ASSERT(batchSize == activationGradientsBackward.size() );

    auto f = [&] (UInt_t i)

   {


       // Im2col(dfTr, df[i], height, width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,

       //       tempZeroPaddingHeight, tempZeroPaddingWidth);


      TCpuMatrix<AFloat> dfTr(tempNLocalViews, tempNLocalViewPixels);


      Im2colFast(dfTr, df[i], vIndices);


       //TMVA_DNN_PrintTCpuMatrix(df[i],"df[i]");

       //TMVA_DNN_PrintTCpuMatrix(dfTr,"dfTr");


       MultiplyTranspose(activationGradientsBackward[i], rotWeights, dfTr);


       //TMVA_DNN_PrintTCpuMatrix(activationGradientsBackward[i],"activGrad-result");


   };


    TCpuMatrix<AFloat>::GetThreadExecutor().Foreach(f, ROOT::TSeqI( batchSize ) );

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::CalculateConvWeightGradients(TCpuMatrix<AFloat> &weightGradients,

                                                const std::vector<TCpuMatrix<AFloat>> &df,

                                                const std::vector<TCpuMatrix<AFloat>> &activationsBackward,

                                                size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,

                                                size_t height, size_t width, size_t filterDepth, size_t filterHeight,

                                                size_t filterWidth, size_t nLocalViews)

{

   // reinitialize the weight gradients to 0

   weightGradients.Zero();


   const size_t filterSize = filterHeight * filterWidth;

   const size_t nLocalViewPixels = filterDepth * filterHeight * filterWidth;

   R__ASSERT( weightGradients.GetNcols() == filterDepth * filterHeight * filterWidth);


   const size_t tempStrideRows = 1;

   const size_t tempStrideCols = 1;


      // Calculate the zero paddings from the input height and width (assume stride =1 )

   const size_t tempZeroPaddingHeight = (height - inputHeight + filterHeight - 1) / 2;

   const size_t tempZeroPaddingWidth = (width - inputWidth + filterWidth - 1) / 2;


   // convolution


   std::vector<int> vIndices(nLocalViews * nLocalViewPixels );

   Im2colIndices(vIndices, activationsBackward[0], nLocalViews, inputHeight, inputWidth, filterHeight , filterWidth,

             tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);


   //std::cout << "do back-propagation in conv layer - compute weight gradient" << std::endl;


   std::vector< TCpuMatrix<AFloat> > vres;//(batchSize);

   for (size_t i = 0; i < batchSize; i++) {

      vres.emplace_back(depth, nLocalViewPixels);

      //TMVA_DNN_PrintTCpuMatrix(df[i],"df");

      //TMVA_DNN_PrintTCpuMatrix(activationsBackward[i],"df");


   }


   auto fmap = [&](int i) {


      //TMVA_DNN_PrintTCpuMatrix(df[i],"df-i");

      TCpuMatrix<AFloat> xTr(nLocalViews, nLocalViewPixels);

      TCpuMatrix<AFloat> res(depth, nLocalViewPixels);


      //computing t he gradient is equivalent of doing a convolution of the input using as conv kernel the delta's (the df[] values)

      //N.B. only stride values=1 are now supported


      //xTr.Zero();

      // Im2col(xTr, const_cast<TCpuMatrix<AFloat> &>(activationsBackward[i]), inputHeight, inputWidth, filterHeight , filterWidth,

      //        tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);

      Im2colFast(xTr, activationsBackward[i], vIndices);


      //std::cout << "doing im2colfast" << std::endl;

      //TMVA_DNN_PrintTCpuMatrix(xTr,"xTr-i");

      //TMVA_DNN_PrintTCpuMatrix(activationsBackward[i],"actbackward-i");

      Multiply(vres[i], df[i], xTr);

      //TMVA_DNN_PrintTCpuMatrix(vres[i],"res_ofMT");


      return;

      //return res;

   };


   TCpuMatrix<AFloat>::GetThreadExecutor().Foreach(fmap, ROOT::TSeqI( batchSize ) );


//   auto freduce = [&](const std::vector<TCpuMatrix<AFloat>> & vres) {

      R__ASSERT(vres.size() == batchSize);

      for (size_t i = 0; i < batchSize; i++) {

         //TMVA_DNN_PrintTCpuMatrix(vres[i],"res");

         for (size_t j = 0; j < depth; j++) {

            for (size_t k = 0; k < filterDepth; k++) {

               size_t kOffset = k * filterSize;

               for (size_t l = 0; l < filterSize; l++) {

                  //weightGradients(j, k * (filterHeight * filterWidth) + l) += res(k, (tempNLocalViews - 1) - l);

                  weightGradients(j, kOffset + l) += vres[i](j,  kOffset + l);

               }

            }

         }

         // TMVA_DNN_PrintTCpuMatrix(weightGradients,"weights_i");

      }

      //  };


   //TCpuMatrix<AFloat>::GetThreadExecutor().MapReduce(fmap, ROOT::TSeqI( batchSize ) , freduce);

   //TMVA_DNN_PrintTCpuMatrix(weightGradients,"W-Grad");

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::CalculateConvBiasGradients(TCpuMatrix<AFloat> &biasGradients, const std::vector<TCpuMatrix<AFloat>> &df,

                                              size_t batchSize, size_t depth, size_t nLocalViews)

{

   biasGradients.Zero();

   for (size_t i = 0; i < depth; i++) {

      AFloat sum = 0;

      for (size_t j = 0; j < nLocalViews; j++) {

         for (size_t k = 0; k < batchSize; k++) {

            sum += df[k](i, j);

         }

      }

      biasGradients(i, 0) = sum;

   }

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::Downsample(TCpuMatrix<AFloat> &A, TCpuMatrix<AFloat> &B, const TCpuMatrix<AFloat> &C,

                              size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows,

                              size_t strideCols)

{

   // image boudaries

   int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;

   int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;

   size_t currLocalView = 0;


   // centers

   for (int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {

      for (int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {

         // within local views

         for (int m = 0; m < (Int_t)C.GetNrows(); m++) {

            AFloat value = -std::numeric_limits<AFloat>::max();


            for (int k = i - fltHeight / 2; k <= Int_t(i + (fltHeight - 1) / 2); k++) {

               for (int l = j - fltWidth / 2; l <= Int_t(j + (fltWidth - 1) / 2); l++) {

                  if (C(m, k * imgWidth + l) > value) {

                     value = C(m, k * imgWidth + l);

                     B(m, currLocalView) = k * imgWidth + l;

                  }

               }

            }

            A(m, currLocalView) = value;

         }

         currLocalView++;

      }

   }

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::MaxPoolLayerBackward(TCpuMatrix<AFloat> &activationGradientsBackward,

                                        const TCpuMatrix<AFloat> &activationGradients,

                                        const TCpuMatrix<AFloat> &indexMatrix,

                                        size_t /* imgHeight */,

                                        size_t /* imgWidth */,

                                        size_t /* fltHeight */,

                                        size_t /* fltWidth */,

                                        size_t /* strideRows */,

                                        size_t /* strideCols */,

                                        size_t nLocalViews)

{

   size_t depth = activationGradientsBackward.GetNrows();


   for (size_t j = 0; j < depth; j++) {

      // initialize to zeros

      for (size_t t = 0; t < (size_t)activationGradientsBackward.GetNcols(); t++) {

         activationGradientsBackward(j, t) = 0;

      }


      // set values

      for (size_t k = 0; k < nLocalViews; k++) {

         AFloat grad = activationGradients(j, k);

         size_t winningIdx = indexMatrix(j, k);

         activationGradientsBackward(j, winningIdx) += grad;

      }

   }

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::Reshape(TCpuMatrix<AFloat> &A, const TCpuMatrix<AFloat> &B)

{

   size_t nColsA = A.GetNcols();

   size_t nColsB = B.GetNcols();


   for (size_t i = 0; i < A.GetNrows(); i++) {

      for (size_t j = 0; j < A.GetNcols(); j++) {

         size_t nElem = i * nColsA + j;

         A(i, j) = B(nElem / nColsB, nElem % nColsB);

      }

   }

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::Flatten(TCpuMatrix<AFloat> &A, const std::vector<TCpuMatrix<AFloat>> &B, size_t size, size_t nRows,

                           size_t nCols)

{

   for (size_t i = 0; i < (size_t)size; i++) {

      for (size_t j = 0; j < (size_t)nRows; j++) {

         for (size_t k = 0; k < (size_t)nCols; k++) {

            A(i, j * nCols + k) = B[i](j, k);

         }

      }

   }

}


//____________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::Deflatten(std::vector<TCpuMatrix<AFloat>> &A, const TCpuMatrix<AFloat> &B, size_t size, size_t nRows,

                             size_t nCols)

{

   for (size_t i = 0; i < (size_t)size; i++) {

      for (size_t j = 0; j < (size_t)nRows; j++) {

         for (size_t k = 0; k < (size_t)nCols; k++) {

            A[i](j, k) = B(i, j * nCols + k);

         }

      }

   }

}


//______________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::Rearrange(std::vector<TCpuMatrix<AReal>> &out, const std::vector<TCpuMatrix<AReal>> &in)

{

   // B x T x D out --- T x B x D in*/

   size_t B = out.size();

   size_t T = out[0].GetNrows();

   size_t D = out[0].GetNcols();

   if ((T != in.size()) || (B != in[0].GetNrows()) || (D != in[0].GetNcols())) {

      std::cout << "Incompatible Dimensions\n"

                << in.size() << "x" << in[0].GetNrows() << "x" << in[0].GetNcols() << " --> " << B << "x" << T << "x"

                << D << "\n";

      return;

   }

   for (size_t i = 0; i < B; ++i) {

      for (size_t j = 0; j < T; ++j) {

         for (size_t k = 0; k < D; ++k) {

            out[i](j, k) = in[j](i, k);

         }

      }

   }

   return;

}


} // namespace DNN

} // namespace TMVA

Blas.h

Cpu.h

b
#define b(i)
Definition: RSha256.hxx:100

f
#define f(i)
Definition: RSha256.hxx:104

Int_t
int Int_t
Definition: RtypesCore.h:41

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

width
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121

R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:96

Error
void Error(const char *location, const char *msgfmt,...)

Fatal
void Fatal(const char *location, const char *msgfmt,...)

floor
double floor(double)

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:89

TMVA::DNN::TCpuMatrix::GetNcols
size_t GetNcols() const
Definition: CpuMatrix.h:143

TMVA::DNN::TCpuMatrix::Zero
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:262

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:152

TMVA::DNN::TCpuMatrix::GetOnePointer
static const AFloat * GetOnePointer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
Definition: CpuMatrix.h:102

TMVA::DNN::TCpuMatrix::GetNWorkItems
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:180

TMVA::DNN::TCpuMatrix::GetNrows
size_t GetNrows() const
Definition: CpuMatrix.h:142

TMVA::DNN::TCpuMatrix::InitializeOneVector
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:87

TMVA::DNN::TCpuMatrix::GetThreadExecutor
static Executor & GetThreadExecutor()
Definition: CpuMatrix.h:155

TMVA::DNN::TCpuMatrix::GetNoElements
size_t GetNoElements() const
Definition: CpuMatrix.h:144

TMVA::DNN::TCpu::CalculateConvActivationGradients
static void CalculateConvActivationGradients(std::vector< TCpuMatrix< Scalar_t > > &activationGradientsBackward, const std::vector< TCpuMatrix< Scalar_t > > &df, const TCpuMatrix< Scalar_t > &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
Definition: Propagation.cxx:374

TMVA::DNN::TCpu::Im2col
static void Im2col(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
Definition: Propagation.cxx:99

TMVA::DNN::TCpu::AddRowWise
static void AddRowWise(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.cxx:60

TMVA::DNN::TCpu::Im2colFast
static void Im2colFast(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, const std::vector< int > &V)
Definition: Propagation.cxx:202

TMVA::DNN::TCpu::Downsample
static void Downsample(TCpuMatrix< AReal > &A, TCpuMatrix< AReal > &B, const TCpuMatrix< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
Definition: Propagation.cxx:549

TMVA::DNN::TCpu::CalculateConvWeightGradients
static void CalculateConvWeightGradients(TCpuMatrix< Scalar_t > &weightGradients, const std::vector< TCpuMatrix< Scalar_t > > &df, const std::vector< TCpuMatrix< Scalar_t > > &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
Definition: Propagation.cxx:443

TMVA::DNN::TCpu::MaxPoolLayerBackward
static void MaxPoolLayerBackward(TCpuMatrix< AReal > &activationGradientsBackward, const TCpuMatrix< AReal > &activationGradients, const TCpuMatrix< AReal > &indexMatrix, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
Definition: Propagation.cxx:582

TMVA::DNN::TCpu::Flatten
static void Flatten(TCpuMatrix< AReal > &A, const std::vector< TCpuMatrix< AReal > > &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
Definition: Propagation.cxx:627

TMVA::DNN::TCpu::Reshape
static void Reshape(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
Definition: Propagation.cxx:612

TMVA::DNN::TCpu::ConvLayerForward
static void ConvLayerForward(std::vector< TCpuMatrix< Scalar_t > > &output, std::vector< TCpuMatrix< Scalar_t > > &derivatives, const std::vector< TCpuMatrix< Scalar_t > > &input, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &biases, const DNN::CNN::TConvParams &params, EActivationFunction activFunc, std::vector< TCpuMatrix< Scalar_t > > &)
Forward propagation in the Convolutional layer.
Definition: Propagation.cxx:294

TMVA::DNN::TCpu::Rearrange
static void Rearrange(std::vector< TCpuMatrix< AReal > > &out, const std::vector< TCpuMatrix< AReal > > &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
Definition: Propagation.cxx:655

TMVA::DNN::TCpu::RotateWeights
static void RotateWeights(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
Definition: Propagation.cxx:247

TMVA::DNN::TCpu::MultiplyTranspose
static void MultiplyTranspose(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
Definition: Propagation.cxx:25

TMVA::DNN::TCpu::calculateDimension
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
Definition: Propagation.cxx:282

TMVA::DNN::TCpu::CalculateConvBiasGradients
static void CalculateConvBiasGradients(TCpuMatrix< Scalar_t > &biasGradients, const std::vector< TCpuMatrix< Scalar_t > > &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
Definition: Propagation.cxx:532

TMVA::DNN::TCpu::ConvLayerBackward
static void ConvLayerBackward(std::vector< TCpuMatrix< Scalar_t > > &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, std::vector< TCpuMatrix< Scalar_t > > &df, const std::vector< TCpuMatrix< Scalar_t > > &activationGradients, const TCpuMatrix< Scalar_t > &weights, const std::vector< TCpuMatrix< Scalar_t > > &activationBackward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
Definition: Propagation.cxx:341

TMVA::DNN::TCpu::Backward
static void Backward(TCpuMatrix< Scalar_t > &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, TCpuMatrix< Scalar_t > &df, const TCpuMatrix< Scalar_t > &activationGradients, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.cxx:79

TMVA::DNN::TCpu::AddConvBiases
static void AddConvBiases(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the biases in the Convolutional Layer.
Definition: Propagation.cxx:263

TMVA::DNN::TCpu::Im2colIndices
static void Im2colIndices(std::vector< int > &V, const TCpuMatrix< AReal > &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Definition: Propagation.cxx:150

TMVA::DNN::TCpu::Deflatten
static void Deflatten(std::vector< TCpuMatrix< AReal > > &A, const TCpuMatrix< AReal > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
Definition: Propagation.cxx:641

TMVA::Executor::Foreach
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:110

ROOT::Math::beta
double beta(double x, double y)
Calculates the beta function.
Definition: SpecFuncMathCore.cxx:111

y
Double_t y[n]
Definition: legend1.C:17

x
Double_t x[n]
Definition: legend1.C:17

n
const Int_t n
Definition: legend1.C:16

ROOT::Math::Cephes::B
static double B[]
Definition: SpecFuncCephes.cxx:178

ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170

ROOT::Math::Cephes::C
static double C[]
Definition: SpecFuncCephes.cxx:187

ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:34

TMVA::DNN::Blas::Ger
void Ger(const int *m, const int *n, const Real_t *alpha, const Real_t *x, const int *incx, const Real_t *y, const int *incy, Real_t *A, const int *lda)
Add the outer product of x and y to the matrix A.

TMVA::DNN::Blas::Gemm
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *B, const int *ldb, const Real_t *beta, Real_t *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.

TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32

TMVA
create variable transformations
Definition: GeneticMinimizer.h:21

TMVA::DNN::CNN::TConvParams
Definition: ConvLayer.h:155

TMVA::DNN::CNN::TConvParams::strideRows
size_t strideRows
The number of row pixels to slid the filter each step.
Definition: ConvLayer.h:168

TMVA::DNN::CNN::TConvParams::filterHeight
size_t filterHeight
The height of the filter.
Definition: ConvLayer.h:165

TMVA::DNN::CNN::TConvParams::inputHeight
size_t inputHeight
The height of the previous layer or input.
Definition: ConvLayer.h:161

TMVA::DNN::CNN::TConvParams::paddingWidth
size_t paddingWidth
The number of zero layers left and right of the input.
Definition: ConvLayer.h:171

TMVA::DNN::CNN::TConvParams::filterWidth
size_t filterWidth
The width of the filter.
Definition: ConvLayer.h:166

TMVA::DNN::CNN::TConvParams::paddingHeight
size_t paddingHeight
The number of zero layers added top and bottom of the input.
Definition: ConvLayer.h:170

TMVA::DNN::CNN::TConvParams::inputWidth
size_t inputWidth
The width of the previous layer or input.
Definition: ConvLayer.h:162

TMVA::DNN::CNN::TConvParams::inputDepth
size_t inputDepth
The depth of the previous layer or input.
Definition: ConvLayer.h:160

TMVA::DNN::CNN::TConvParams::strideCols
size_t strideCols
The number of column pixels to slid the filter each step.
Definition: ConvLayer.h:169

m
auto * m
Definition: textangle.C:8

l
auto * l
Definition: textangle.C:4

a
auto * a
Definition: textangle.C:12

sum
static long int sum(long int i)
Definition: Factory.cxx:2258

output
static void output(int code)
Definition: gifencode.c:226