24template <
typename AFloat>
33 if ((
int)
output.GetNrows() !=
m) {
34 Error(
"MultiplyTranspose",
"Invalid input - output rows - input: %d != output : %d",
m, (
int)
output.GetNrows());
37 if ((
int)
output.GetNcols() !=
n) {
38 Error(
"MultiplyTranspose",
"Invalid output cols or weight rows - output cols: %d != weight rows : %d",(
int)
output.GetNcols(),
n);
42 Error(
"MultiplyTranspose",
"Invalid input cols or weight cols - input cols: %d != weight cols : %d", k, (
int) Weights.
GetNcols());
54 AFloat *
C =
output.GetRawDataPointer();
56 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &
m, &
n, &k, &alpha,
A, &
m,
B, &
n, &
beta,
C, &
m);
59template <
typename AFloat>
68 AFloat *
A =
output.GetRawDataPointer();
78template <
typename AFloat>
85 Hadamard(df, activationGradients);
88 if (activationGradientsBackward.
GetNoElements() > 0) Multiply(activationGradientsBackward, df, weights);
91 if (weightGradients.
GetNoElements() > 0) TransposeMultiply(weightGradients, df, activationsBackward);
94 if (biasGradients.
GetNoElements() > 0) SumColumns(biasGradients, df);
98template <
typename AFloat>
100 size_t fltHeight,
size_t fltWidth,
size_t strideRows,
size_t strideCols,
101 size_t zeroPaddingHeight,
size_t zeroPaddingWidth)
105 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
106 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
107 size_t currLocalView = 0;
109 const int halfFltHeight = fltHeight / 2;
110 const int halfFltWidth = fltWidth / 2;
111 const int halfFltHeightM1 = (fltHeight - 1) / 2;
112 const int halfFltWidthM1 = (fltWidth - 1) / 2;
113 const int nRowsInput =
B.GetNrows();
114 const int nColsInput =
B.GetNcols();
115 const int nRowsOutput =
A.GetNrows();
116 const int nColsOutput =
A.GetNcols();
119 for (
int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
120 for (
int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
121 size_t currLocalViewPixel = 0;
124 R__ASSERT((
int) currLocalView < nRowsOutput );
126 for (
int m = 0;
m < nRowsInput;
m++) {
127 for (
int k = i - halfFltHeight ; k <=
Int_t(i + halfFltHeightM1 ); k++) {
128 int kstep = k * imgWidth;
129 for (
int l = j - halfFltWidth ;
l <=
Int_t(j + halfFltWidthM1);
l++) {
132 R__ASSERT((
int) currLocalViewPixel < nColsOutput );
134 if (k < 0 || k >= (
Int_t)imgHeight || l < 0 || l >= (
Int_t)imgWidth || kstep +
l >= nColsInput)
135 A(currLocalView, currLocalViewPixel++) = 0;
137 A(currLocalView, currLocalViewPixel++) =
B(
m, kstep +
l);
149template <
typename AFloat>
151 size_t fltHeight,
size_t fltWidth,
size_t strideRows,
size_t strideCols,
152 size_t zeroPaddingHeight,
size_t zeroPaddingWidth)
156 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
157 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
158 size_t currLocalView = 0;
160 const int halfFltHeight = fltHeight / 2;
161 const int halfFltWidth = fltWidth / 2;
162 const int halfFltHeightM1 = (fltHeight - 1) / 2;
163 const int halfFltWidthM1 = (fltWidth - 1) / 2;
164 const int nRowsInput =
B.GetNrows();
165 const int nColsInput =
B.GetNcols();
166 const size_t nSizeOutput = V.size();
167 const int npixels = nRowsInput * fltHeight * fltWidth;
172 for (
int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
173 for (
int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
174 size_t currLocalViewPixel = 0;
179 for (
int m = 0;
m < nRowsInput;
m++) {
180 for (
int k = i - halfFltHeight ; k <=
Int_t(i + halfFltHeightM1 ); k++) {
181 int kstep = k * imgWidth;
182 for (
int l = j - halfFltWidth ;
l <=
Int_t(j + halfFltWidthM1);
l++) {
186 R__ASSERT(currLocalView * npixels + currLocalViewPixel < nSizeOutput );
187 if (k < 0 || k >= (
Int_t)imgHeight || l < 0 || l >= (
Int_t)imgWidth || kstep +
l >= nColsInput)
189 V[currLocalViewPixel * nLocalViews + currLocalView] = -1;
191 V[currLocalViewPixel * nLocalViews + currLocalView]= ( kstep +
l) * nRowsInput +
m;
193 currLocalViewPixel++;
201template <
typename AFloat>
206 AFloat *
a =
A.GetRawDataPointer();
207 const AFloat *
b =
B.GetRawDataPointer();
216 for (
size_t j = 0; j < nsteps; ++j) {
217 size_t ii = workerID+j;
220 if (idx >= 0)
a[ii] =
b[idx];
230 for (
size_t ii = 0; ii <
n; ++ii) {
232 if (idx >= 0)
a[ii] =
b[idx];
246template <
typename AFloat>
248 size_t filterHeight,
size_t filterWidth,
size_t numFilters)
250 size_t jump = filterHeight * filterWidth;
251 for (
size_t j = 0; j < filterDepth; j++) {
252 for (
size_t k = 0; k < numFilters; k++) {
253 for (
size_t i = 0; i < jump; i++) {
254 A(j, k * jump + i) =
B(k, ((j + 1) * jump - 1) - i);
262template <
typename AFloat>
265 int m = (int)
output.GetNrows();
266 int n = (int)
output.GetNcols();
271 AFloat *
A =
output.GetRawDataPointer();
281template<
typename AFloat>
284 size_t temp = imgDim - fltDim + 2 * padding;
285 if (temp % stride || temp + stride <= 0) {
286 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride) "
287 "%zu, %zu, %zu, %zu", imgDim, fltDim, padding, stride);
289 return temp / stride + 1;
293template <
typename AFloat>
303 size_t nLocalViews = height *
width;
307 std::vector<int> forwardIndices(nLocalViews * nLocalViewPixels);
326 Im2colFast(inputTr, input[i], forwardIndices);
328 MultiplyTranspose(
output[i], weights, inputTr);
329 AddConvBiases(
output[i], biases);
331 evaluateDerivative<TCpu<AFloat>>(derivatives[i], activFunc,
output[i]);
332 evaluate<TCpu<AFloat>>(
output[i], activFunc);
340template <
typename AFloat>
347 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
size_t width,
348 size_t filterDepth,
size_t filterHeight,
size_t filterWidth,
size_t nLocalViews)
355 for (
size_t i = 0; i < batchSize; i++) {
357 Hadamard(df[i], activationGradients[i]);
361 CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth, depth,
362 height,
width, filterDepth, filterHeight, filterWidth);
365 CalculateConvWeightGradients(weightGradients, df, activationsBackward, batchSize, inputHeight, inputWidth, depth,
366 height,
width, filterDepth, filterHeight, filterWidth, nLocalViews);
369 CalculateConvBiasGradients(biasGradients, df, batchSize, depth, nLocalViews);
373template <
typename AFloat>
377 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
378 size_t width,
size_t filterDepth,
size_t filterHeight,
381 if (activationGradientsBackward.size() == 0)
return;
383 for (
size_t i = 0; i < activationGradientsBackward.size(); i++) {
384 activationGradientsBackward[i].Zero();
392 RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.
GetNrows());
396 size_t tempZeroPaddingHeight = (size_t)(
floor((inputHeight - height + filterHeight - 1) / 2));
397 size_t tempZeroPaddingWidth = (size_t)(
floor((inputWidth -
width + filterWidth - 1) / 2));
403 size_t tempNLocalViews = inputHeight * inputWidth;
404 size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
406 size_t tempStrideRows = 1;
407 size_t tempStrideCols = 1;
411 std::vector<int> vIndices( tempNLocalViews * tempNLocalViewPixels );
412 Im2colIndices(vIndices, df[0], tempNLocalViews, height,
width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
413 tempZeroPaddingHeight, tempZeroPaddingWidth);
418 R__ASSERT(batchSize == activationGradientsBackward.size() );
427 Im2colFast(dfTr, df[i], vIndices);
432 MultiplyTranspose(activationGradientsBackward[i], rotWeights, dfTr);
442template <
typename AFloat>
446 size_t batchSize,
size_t inputHeight,
size_t inputWidth,
size_t depth,
447 size_t height,
size_t width,
size_t filterDepth,
size_t filterHeight,
448 size_t filterWidth,
size_t nLocalViews)
451 weightGradients.
Zero();
453 const size_t filterSize = filterHeight * filterWidth;
454 const size_t nLocalViewPixels = filterDepth * filterHeight * filterWidth;
455 R__ASSERT( weightGradients.
GetNcols() == filterDepth * filterHeight * filterWidth);
457 const size_t tempStrideRows = 1;
458 const size_t tempStrideCols = 1;
461 const size_t tempZeroPaddingHeight = (height - inputHeight + filterHeight - 1) / 2;
462 const size_t tempZeroPaddingWidth = (
width - inputWidth + filterWidth - 1) / 2;
469 std::vector<int> vIndices(nLocalViews * nLocalViewPixels );
470 Im2colIndices(vIndices, activationsBackward[0], nLocalViews, inputHeight, inputWidth, filterHeight , filterWidth,
471 tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
475 std::vector< TCpuMatrix<AFloat> > vres;
476 for (
size_t i = 0; i < batchSize; i++) {
477 vres.emplace_back(depth, nLocalViewPixels);
483 auto fmap = [&](
int i) {
495 Im2colFast(xTr, activationsBackward[i], vIndices);
500 Multiply(vres[i], df[i], xTr);
511 for (
size_t i = 0; i < batchSize; i++) {
513 for (
size_t j = 0; j < depth; j++) {
514 for (
size_t k = 0; k < filterDepth; k++) {
515 size_t kOffset = k * filterSize;
516 for (
size_t l = 0;
l < filterSize;
l++) {
518 weightGradients(j, kOffset +
l) += vres[i](j, kOffset +
l);
531template <
typename AFloat>
533 size_t batchSize,
size_t depth,
size_t nLocalViews)
535 biasGradients.
Zero();
536 for (
size_t i = 0; i < depth; i++) {
538 for (
size_t j = 0; j < nLocalViews; j++) {
539 for (
size_t k = 0; k < batchSize; k++) {
543 biasGradients(i, 0) =
sum;
548template <
typename AFloat>
550 size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
size_t fltWidth,
size_t strideRows,
554 int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;
555 int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;
556 size_t currLocalView = 0;
559 for (
int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {
560 for (
int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {
562 for (
int m = 0;
m < (
Int_t)
C.GetNrows();
m++) {
563 AFloat value = -std::numeric_limits<AFloat>::max();
565 for (
int k = i - fltHeight / 2; k <=
Int_t(i + (fltHeight - 1) / 2); k++) {
566 for (
int l = j - fltWidth / 2;
l <=
Int_t(j + (fltWidth - 1) / 2);
l++) {
567 if (
C(
m, k * imgWidth +
l) > value) {
568 value =
C(
m, k * imgWidth +
l);
569 B(
m, currLocalView) = k * imgWidth +
l;
573 A(
m, currLocalView) = value;
581template <
typename AFloat>
593 size_t depth = activationGradientsBackward.
GetNrows();
595 for (
size_t j = 0; j < depth; j++) {
597 for (
size_t t = 0; t < (size_t)activationGradientsBackward.
GetNcols(); t++) {
598 activationGradientsBackward(j, t) = 0;
602 for (
size_t k = 0; k < nLocalViews; k++) {
603 AFloat grad = activationGradients(j, k);
604 size_t winningIdx = indexMatrix(j, k);
605 activationGradientsBackward(j, winningIdx) += grad;
611template <
typename AFloat>
614 size_t nColsA =
A.GetNcols();
615 size_t nColsB =
B.GetNcols();
617 for (
size_t i = 0; i <
A.GetNrows(); i++) {
618 for (
size_t j = 0; j <
A.GetNcols(); j++) {
619 size_t nElem = i * nColsA + j;
620 A(i, j) =
B(nElem / nColsB, nElem % nColsB);
626template <
typename AFloat>
630 for (
size_t i = 0; i < (size_t)size; i++) {
631 for (
size_t j = 0; j < (size_t)nRows; j++) {
632 for (
size_t k = 0; k < (size_t)nCols; k++) {
633 A(i, j * nCols + k) =
B[i](j, k);
640template <
typename AFloat>
644 for (
size_t i = 0; i < (size_t)size; i++) {
645 for (
size_t j = 0; j < (size_t)nRows; j++) {
646 for (
size_t k = 0; k < (size_t)nCols; k++) {
647 A[i](j, k) =
B(i, j * nCols + k);
654template <
typename AReal>
658 size_t B = out.size();
659 size_t T = out[0].GetNrows();
660 size_t D = out[0].GetNcols();
661 if ((
T != in.size()) || (
B != in[0].GetNrows()) || (D != in[0].GetNcols())) {
662 std::cout <<
"Incompatible Dimensions\n"
663 << in.size() <<
"x" << in[0].GetNrows() <<
"x" << in[0].GetNcols() <<
" --> " <<
B <<
"x" <<
T <<
"x"
667 for (
size_t i = 0; i <
B; ++i) {
668 for (
size_t j = 0; j <
T; ++j) {
669 for (
size_t k = 0; k < D; ++k) {
670 out[i](j, k) = in[j](i, k);
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
A pseudo container class which is a generator of indices.
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
void Zero()
Clear content of the matrix and initialize to zero elements.
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
static const AFloat * GetOnePointer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
static size_t GetNWorkItems(size_t nelements)
static ROOT::TThreadExecutor & GetThreadExecutor()
static void InitializeOneVector(size_t n)
size_t GetNoElements() const
static void CalculateConvActivationGradients(std::vector< TCpuMatrix< Scalar_t > > &activationGradientsBackward, const std::vector< TCpuMatrix< Scalar_t > > &df, const TCpuMatrix< Scalar_t > &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void Im2col(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void AddRowWise(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
static void Im2colFast(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, const std::vector< int > &V)
static void Downsample(TCpuMatrix< AReal > &A, TCpuMatrix< AReal > &B, const TCpuMatrix< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void CalculateConvWeightGradients(TCpuMatrix< Scalar_t > &weightGradients, const std::vector< TCpuMatrix< Scalar_t > > &df, const std::vector< TCpuMatrix< Scalar_t > > &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static void MaxPoolLayerBackward(TCpuMatrix< AReal > &activationGradientsBackward, const TCpuMatrix< AReal > &activationGradients, const TCpuMatrix< AReal > &indexMatrix, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void Flatten(TCpuMatrix< AReal > &A, const std::vector< TCpuMatrix< AReal > > &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void Reshape(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
static void ConvLayerForward(std::vector< TCpuMatrix< Scalar_t > > &output, std::vector< TCpuMatrix< Scalar_t > > &derivatives, const std::vector< TCpuMatrix< Scalar_t > > &input, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, std::vector< TCpuMatrix< Scalar_t > > &)
Forward propagation in the Convolutional layer.
static void Rearrange(std::vector< TCpuMatrix< AReal > > &out, const std::vector< TCpuMatrix< AReal > > &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void RotateWeights(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void MultiplyTranspose(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void CalculateConvBiasGradients(TCpuMatrix< Scalar_t > &biasGradients, const std::vector< TCpuMatrix< Scalar_t > > &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void ConvLayerBackward(std::vector< TCpuMatrix< Scalar_t > > &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, std::vector< TCpuMatrix< Scalar_t > > &df, const std::vector< TCpuMatrix< Scalar_t > > &activationGradients, const TCpuMatrix< Scalar_t > &weights, const std::vector< TCpuMatrix< Scalar_t > > &activationBackward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void Backward(TCpuMatrix< Scalar_t > &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, TCpuMatrix< Scalar_t > &df, const TCpuMatrix< Scalar_t > &activationGradients, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
static void AddConvBiases(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the biases in the Convolutional Layer.
static void Im2colIndices(std::vector< int > &V, const TCpuMatrix< AReal > &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void Deflatten(std::vector< TCpuMatrix< AReal > > &A, const TCpuMatrix< AReal > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
double beta(double x, double y)
Calculates the beta function.
void Ger(const int *m, const int *n, const Real_t *alpha, const Real_t *x, const int *incx, const Real_t *y, const int *incy, Real_t *A, const int *lda)
Add the outer product of x and y to the matrix A.
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *B, const int *ldb, const Real_t *beta, Real_t *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
EActivationFunction
Enum that represents layer activation functions.
Abstract ClassifierFactory template that handles arbitrary types.
size_t strideRows
The number of row pixels to slid the filter each step.
size_t filterHeight
The height of the filter.
size_t inputHeight
The height of the previous layer or input.
size_t paddingWidth
The number of zero layers left and right of the input.
size_t filterWidth
The width of the filter.
size_t paddingHeight
The number of zero layers added top and bottom of the input.
size_t inputWidth
The width of the previous layer or input.
size_t inputDepth
The depth of the previous layer or input.
size_t strideCols
The number of column pixels to slid the filter each step.
static long int sum(long int i)