22template <
typename AReal>
29template <
typename AReal>
32 for (
size_t i = 0; i < (size_t)
output.GetNrows(); i++) {
33 for (
size_t j = 0; j < (size_t)
output.GetNcols(); j++) {
34 output(i, j) += biases(j, 0);
39template <
typename AReal>
47 for (
size_t i = 0; i < (size_t)df.
GetNrows(); i++) {
48 for (
size_t j = 0; j < (size_t)df.
GetNcols(); j++) {
49 df(i, j) *= activation_gradients(i, j);
55 activation_gradients_backward.
Mult(df, weights);
60 weight_gradients.
TMult(df, activations_backward);
65 for (
size_t j = 0; j < (size_t)df.
GetNcols(); j++) {
67 for (
size_t i = 0; i < (size_t)df.
GetNrows(); i++) {
70 bias_gradients(j, 0) =
sum;
75template <
typename AReal>
78 for (
size_t i = 0; i < (size_t)A.
GetNrows(); i++) {
79 for (
size_t j = 0; j < (size_t)A.
GetNcols(); j++) {
80 A(i, j) += beta * B(i, j);
85template <
typename AReal>
91template <
typename AReal>
94 for (
size_t i = 0; i < A.size(); ++i) {
95 ScaleAdd(A[i], B[i], beta);
99template <
typename AReal>
102 for (
size_t i = 0; i < A.size(); ++i) {
108template <
typename AReal>
110 size_t fltHeight,
size_t fltWidth,
size_t strideRows,
size_t strideCols,
111 size_t zeroPaddingHeight,
size_t zeroPaddingWidth)
114 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
115 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
116 size_t currLocalView = 0;
119 for (
int i = -1*
int(zeroPaddingHeight) + fltHeight / 2; i <= imgHeightBound; i += strideRows) {
120 for (
int j = -1*
int(zeroPaddingWidth) + fltWidth / 2; j <= imgWidthBound; j += strideCols) {
121 size_t currLocalViewPixel = 0;
125 for (
Int_t k = i -
Int_t(fltHeight) / 2; k <= i + (
Int_t(fltHeight) - 1) / 2; k++) {
129 if (k < 0 || k >=
Int_t(imgHeight) || l < 0 || l >=
Int_t(imgWidth))
130 A(currLocalView, currLocalViewPixel++) = 0;
132 A(currLocalView, currLocalViewPixel++) = B(
m, k * imgWidth +
l);
143template <
typename AReal>
145 size_t filterHeight,
size_t filterWidth,
size_t numFilters)
147 size_t jump = filterHeight * filterWidth;
148 for (
size_t j = 0; j < filterDepth; j++) {
149 for (
size_t k = 0; k < numFilters; k++) {
150 for (
size_t i = 0; i < jump; i++) {
151 A(j, k * jump + i) = B(k, ((j + 1) * jump - 1) - i);
158template <
typename AReal>
161 for (
size_t i = 0; i < (size_t)
output.GetNrows(); i++) {
162 for (
size_t j = 0; j < (size_t)
output.GetNcols(); j++) {
163 output(i, j) += biases(i, 0);
168#ifdef HAVE_CNN_REFERENCE
170template <
typename AReal>
176 const std::vector<
TMatrixT<AReal>> &activations_backward,
size_t batchSize,
177 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
178 size_t width,
size_t filterDepth,
size_t filterHeight,
size_t filterWidth,
184 m = activation_gradients[0].GetNrows();
185 n = activation_gradients[0].GetNcols();
187 for (
size_t i = 0; i < batchSize; i++) {
188 for (
size_t j = 0; j < (size_t)
m; j++) {
189 for (
size_t k = 0; k < (size_t)
n; k++) {
190 df[i](j, k) *= activation_gradients[i](j, k);
196 CalculateConvActivationGradients(activation_gradients_backward, df, weights, batchSize, inputHeight, inputWidth,
197 depth,
height,
width, filterDepth, filterHeight, filterWidth);
200 CalculateConvWeightGradients(weight_gradients, df, activations_backward, batchSize, inputHeight, inputWidth, depth,
201 height,
width, filterDepth, filterHeight, filterWidth, nLocalViews);
204 CalculateConvBiasGradients(bias_gradients, df, batchSize, depth, nLocalViews);
208template <
typename AReal>
209void TReference<AReal>::CalculateConvActivationGradients(std::vector<
TMatrixT<AReal>> &activation_gradients_backward,
212 size_t inputHeight,
size_t inputWidth,
size_t depth,
214 size_t filterHeight,
size_t filterWidth)
217 if (activation_gradients_backward.size() == 0)
return;
220 TMatrixT<AReal> rotWeights(filterDepth, depth * filterHeight * filterWidth);
224 size_t tempZeroPaddingHeight = (size_t)(
floor((inputHeight -
height + filterHeight - 1) / 2));
225 size_t tempZeroPaddingWidth = (size_t)(
floor((inputWidth -
width + filterWidth - 1) / 2));
228 size_t tempNLocalViews = inputHeight * inputWidth;
229 size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
231 size_t tempStrideRows = 1;
232 size_t tempStrideCols = 1;
235 for (
size_t i = 0; i < batchSize; i++) {
237 Im2col(dfTr, df[i], inputHeight, inputWidth, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
238 tempZeroPaddingHeight, tempZeroPaddingWidth);
240 activation_gradients_backward[i].MultT(rotWeights, dfTr);
247template <
typename AReal>
248void TReference<AReal>::CalculateConvWeightGradients(
TMatrixT<AReal> &weight_gradients,
251 size_t batchSize,
size_t inputHeight,
size_t inputWidth,
252 size_t depth,
size_t height,
size_t width,
size_t filterDepth,
253 size_t filterHeight,
size_t filterWidth,
size_t nLocalViews)
259 weight_gradients(i, j) = 0;
262 for (
size_t i = 0; i < batchSize; i++) {
264 size_t tempZeroPaddingHeight = (filterHeight -
height + inputHeight - 1) / 2;
265 size_t tempZeroPaddingWidth = (filterWidth -
width + inputWidth - 1) / 2;
267 size_t tempNLocalViews = filterHeight * filterWidth;
268 size_t tempNLocalViewPixels = inputHeight * inputWidth;
270 size_t tempStrideRows = 1;
271 size_t tempStrideCols = 1;
273 for (
size_t j = 0; j < depth; j++) {
277 for (
size_t k = 0; k < nLocalViews; k++) {
278 rowDelta(0, k) = df[i](j, k);
285 Im2col(rowDeltaTr, rowDelta,
height,
width, inputHeight, inputWidth, tempStrideRows, tempStrideCols,
286 tempZeroPaddingHeight, tempZeroPaddingWidth);
288 res.MultT(activations_backward[i], rowDeltaTr);
290 for (
size_t k = 0; k < filterDepth; k++) {
291 for (
size_t l = 0;
l < filterHeight * filterWidth;
l++) {
292 weight_gradients(j, k * (filterHeight * filterWidth) +
l) += res(k, (tempNLocalViews - 1) -
l);
299 (
void)weight_gradients;
301 (
void)activations_backward;
316template <
typename AReal>
318 size_t batchSize,
size_t depth,
size_t nLocalViews)
320 for (
size_t i = 0; i < depth; i++) {
322 for (
size_t j = 0; j < nLocalViews; j++) {
323 for (
size_t k = 0; k < batchSize; k++) {
327 bias_gradients(i, 0) =
sum;
333template <
typename AReal>
335 size_t imgWidth,
size_t fltHeight,
size_t fltWidth,
size_t strideRows,
339 int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;
340 int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;
341 size_t currLocalView = 0;
344 for (
int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {
345 for (
int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {
347 for (
int m = 0;
m < C.GetNrows();
m++) {
348 AReal value = -std::numeric_limits<AReal>::max();
350 for (
int k = i -
Int_t(fltHeight) / 2; k <= i + (
Int_t(fltHeight) - 1) / 2; k++) {
351 for (
int l = j -
Int_t(fltWidth) / 2;
l <= j + (
Int_t(fltWidth) - 1) / 2;
l++) {
352 if (C(
m, k * imgWidth +
l) >
value) {
353 value = C(
m, k * imgWidth +
l);
354 B(
m, currLocalView) = k * imgWidth +
l;
358 A(
m, currLocalView) =
value;
366template <
typename AReal>
370 size_t ,
size_t ,
size_t ,
371 size_t ,
size_t ,
size_t ,
374 size_t depth = activationGradientsBackward.
GetNrows();
376 for (
size_t j = 0; j < depth; j++) {
378 for (
size_t t = 0; t < (size_t)activationGradientsBackward.
GetNcols(); t++) {
379 activationGradientsBackward[j][t] = 0;
383 for (
size_t k = 0; k < nLocalViews; k++) {
384 AReal grad = activationGradients[j][k];
385 size_t winningIdx = indexMatrix[j][k];
386 activationGradientsBackward[j][winningIdx] += grad;
392template <
typename AReal>
400 auto nElem = i * nColsA + j;
401 A(i, j) = B(nElem / nColsB, nElem % nColsB);
407template <
typename AReal>
411 for (
size_t i = 0; i < (size_t)
size; i++) {
412 for (
size_t j = 0; j < (size_t)nRows; j++) {
413 for (
size_t k = 0; k < (size_t)nCols; k++) {
414 A(i, j * nCols + k) = B[i](j, k);
421template <
typename AReal>
425 for (
size_t i = 0; i < (size_t)
size; i++) {
426 for (
size_t j = 0; j < (size_t)nRows; j++) {
427 for (
size_t k = 0; k < (size_t)nCols; k++) {
428 A[i](j, k) = B(i, j * nCols + k);
435template <
typename AReal>
440 auto T = out[0].GetNrows();
441 auto D = out[0].GetNcols();
442 if ((T != (
Int_t)in.size()) || (
Int_t(B) != in[0].GetNrows()) || (D != in[0].GetNcols())) {
443 std::cout <<
"Incompatible Dimensions\n"
444 << in.size() <<
"x" << in[0].GetNrows() <<
"x" << in[0].GetNcols() <<
" --> " << B <<
"x" << T <<
"x"
448 for (
size_t i = 0; i < B; ++i) {
449 for (
Int_t j = 0; j < T; ++j) {
450 for (
Int_t k = 0; k < D; ++k) {
451 out[i](j, k) = in[j](i, k);
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
static void MultiplyTranspose(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &input, const TMatrixT< Scalar_t > &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
static void Flatten(TMatrixT< AReal > &A, const std::vector< TMatrixT< AReal > > &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void MaxPoolLayerBackward(TMatrixT< AReal > &activationGradientsBackward, const TMatrixT< AReal > &activationGradients, const TMatrixT< AReal > &indexMatrix, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCol, size_t nLocalViews)
Perform the complete backward propagation step in a Max Pooling Layer.
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
static void Downsample(TMatrixT< AReal > &A, TMatrixT< AReal > &B, const TMatrixT< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void Backward(TMatrixT< Scalar_t > &activationGradientsBackward, TMatrixT< Scalar_t > &weightGradients, TMatrixT< Scalar_t > &biasGradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &activationGradients, const TMatrixT< Scalar_t > &weights, const TMatrixT< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
static void RotateWeights(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void Rearrange(std::vector< TMatrixT< AReal > > &out, const std::vector< TMatrixT< AReal > > &in)
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
static void Deflatten(std::vector< TMatrixT< AReal > > &A, const TMatrixT< Scalar_t > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
static void Im2col(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void ConvLayerBackward(std::vector< TMatrixT< AReal > > &, TMatrixT< AReal > &, TMatrixT< AReal > &, std::vector< TMatrixT< AReal > > &, const std::vector< TMatrixT< AReal > > &, const TMatrixT< AReal > &, const std::vector< TMatrixT< AReal > > &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
Perform the complete backward propagation step in a Convolutional Layer.
static void AddConvBiases(TMatrixT< AReal > &output, const TMatrixT< AReal > &biases)
Add the biases in the Convolutional Layer.
static void Copy(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B)
static void Reshape(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
Int_t GetNoElements() const
void TMult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
Create a matrix C such that C = A' * B.
void Mult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
General matrix multiplication. Create a matrix C such that C = A * B.
RVec< PromoteType< T > > floor(const RVec< T > &v)
void(off) SmallVectorTemplateBase< T
__global__ void RotateWeights(AFloat *A, const AFloat *B, int filterDepth, int filterHeight, int filterWidth, int numFilters)
void Im2col(const T *data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, T *data_col)
im2col : efficient function to re-arrange input data of convolution to a matrix that can be used by B...
create variable transformations
static uint64_t sum(uint64_t i)