Logo ROOT   6.14/05
Reference Guide
Propagation.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$ // Author: Simon Pfreundschuh 10/07/16
2 
3 /*************************************************************************
4  * Copyright (C) 2016, Simon Pfreundschuh *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 /////////////////////////////////////////////////////////////////////
12 // Implementation of the functions required for the forward and //
13 // backward propagation of activations through a neural network in //
14 // the reference implementation. //
15 /////////////////////////////////////////////////////////////////////
16 
18 
19 namespace TMVA {
20 namespace DNN {
21 
22 template <typename AReal>
24  const TMatrixT<AReal> &weights)
25 {
26  output.MultT(input, weights);
27 }
28 
29 template <typename AReal>
31 {
32  for (size_t i = 0; i < (size_t)output.GetNrows(); i++) {
33  for (size_t j = 0; j < (size_t)output.GetNcols(); j++) {
34  output(i, j) += biases(j, 0);
35  }
36  }
37 }
38 
39 template <typename AReal>
40 void TReference<AReal>::Backward(TMatrixT<AReal> &activation_gradients_backward, TMatrixT<AReal> &weight_gradients,
41  TMatrixT<AReal> &bias_gradients, TMatrixT<AReal> &df,
42  const TMatrixT<AReal> &activation_gradients, const TMatrixT<AReal> &weights,
43  const TMatrixT<AReal> &activations_backward)
44 {
45 
46  // Compute element-wise product.
47  for (size_t i = 0; i < (size_t)df.GetNrows(); i++) {
48  for (size_t j = 0; j < (size_t)df.GetNcols(); j++) {
49  df(i, j) *= activation_gradients(i, j);
50  }
51  }
52 
53  // Activation gradients.
54  if (activation_gradients_backward.GetNoElements() > 0) {
55  activation_gradients_backward.Mult(df, weights);
56  }
57 
58  // Weights gradients.
59  if (weight_gradients.GetNoElements() > 0) {
60  weight_gradients.TMult(df, activations_backward);
61  }
62 
63  // Bias gradients.
64  if (bias_gradients.GetNoElements() > 0) {
65  for (size_t j = 0; j < (size_t)df.GetNcols(); j++) {
66  AReal sum = 0.0;
67  for (size_t i = 0; i < (size_t)df.GetNrows(); i++) {
68  sum += df(i, j);
69  }
70  bias_gradients(j, 0) = sum;
71  }
72  }
73 }
74 
75 template <typename AReal>
77 {
78  for (size_t i = 0; i < (size_t)A.GetNrows(); i++) {
79  for (size_t j = 0; j < (size_t)A.GetNcols(); j++) {
80  A(i, j) += beta * B(i, j);
81  }
82  }
83 }
84 
85 template <typename AReal>
87 {
88  A = B;
89 }
90 
91 template <typename AReal>
92 void TReference<AReal>::ScaleAdd(std::vector<TMatrixT<AReal>> &A, const std::vector<TMatrixT<AReal>> &B, AReal beta)
93 {
94  for (size_t i = 0; i < A.size(); ++i) {
95  ScaleAdd(A[i], B[i], beta);
96  }
97 }
98 
99 template <typename AReal>
100 void TReference<AReal>::Copy(std::vector<TMatrixT<AReal>> &A, const std::vector<TMatrixT<AReal>> &B)
101 {
102  for (size_t i = 0; i < A.size(); ++i) {
103  Copy(A[i], B[i]);
104  }
105 }
106 
107 template <typename AReal>
109 {
110  B = 0.0;
111  for (Int_t i = 0; i < A.GetNrows(); i++) {
112  for (Int_t j = 0; j < A.GetNcols(); j++) {
113  B(0, j) += A(i, j);
114  }
115  }
116 }
117 
118 //______________________________________________________________________________
119 template <typename AReal>
120 void TReference<AReal>::Im2col(TMatrixT<AReal> &A, TMatrixT<AReal> &B, size_t imgHeight, size_t imgWidth,
121  size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
122  size_t zeroPaddingHeight, size_t zeroPaddingWidth)
123 {
124  // image boudaries
125  int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
126  int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
127  size_t currLocalView = 0;
128 
129  // convolution centers
130  for (int i = -zeroPaddingHeight + fltHeight / 2; i <= imgHeightBound; i += strideRows) {
131  for (int j = -zeroPaddingWidth + fltWidth / 2; j <= imgWidthBound; j += strideCols) {
132  size_t currLocalViewPixel = 0;
133 
134  // within the local view
135  for (int m = 0; m < B.GetNrows(); m++) {
136  for (Int_t k = i - Int_t(fltHeight) / 2; k <= i + (Int_t(fltHeight) - 1) / 2; k++) {
137  for (Int_t l = j - Int_t(fltWidth) / 2; l <= j + (Int_t(fltWidth) - 1) / 2; l++) {
138 
139  // Check the boundaries
140  if (k < 0 || k >= Int_t(imgHeight) || l < 0 || l >= Int_t(imgWidth))
141  A(currLocalView, currLocalViewPixel++) = 0;
142  else
143  A(currLocalView, currLocalViewPixel++) = B(m, k * imgWidth + l);
144  }
145  }
146  }
147 
148  currLocalView++;
149  }
150  }
151 }
152 
153 //______________________________________________________________________________
154 template <typename AReal>
156  size_t filterHeight, size_t filterWidth, size_t numFilters)
157 {
158  size_t jump = filterHeight * filterWidth;
159  for (size_t j = 0; j < filterDepth; j++) {
160  for (size_t k = 0; k < numFilters; k++) {
161  for (size_t i = 0; i < jump; i++) {
162  A(j, k * jump + i) = B(k, ((j + 1) * jump - 1) - i);
163  }
164  }
165  }
166 }
167 
168 //______________________________________________________________________________
169 template <typename AReal>
171 {
172  for (size_t i = 0; i < (size_t)output.GetNrows(); i++) {
173  for (size_t j = 0; j < (size_t)output.GetNcols(); j++) {
174  output(i, j) += biases(i, 0);
175  }
176  }
177 }
178 
179 #ifdef HAVE_CNN_REFERENCE
180 //______________________________________________________________________________
181 template <typename AReal>
182 void TReference<AReal>::ConvLayerBackward(std::vector<TMatrixT<AReal>> &activation_gradients_backward,
183  TMatrixT<AReal> &weight_gradients, TMatrixT<AReal> &bias_gradients,
184  std::vector<TMatrixT<AReal>> &df,
185  const std::vector<TMatrixT<AReal>> &activation_gradients,
186  const TMatrixT<AReal> &weights,
187  const std::vector<TMatrixT<AReal>> &activations_backward, size_t batchSize,
188  size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
189  size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth,
190  size_t nLocalViews)
191 {
192 
193  // Update derivatives
194  size_t m, n;
195  m = activation_gradients[0].GetNrows();
196  n = activation_gradients[0].GetNcols();
197 
198  for (size_t i = 0; i < batchSize; i++) {
199  for (size_t j = 0; j < (size_t)m; j++) {
200  for (size_t k = 0; k < (size_t)n; k++) {
201  df[i](j, k) *= activation_gradients[i](j, k);
202  }
203  }
204  }
205 
206  // Calculate the activation gradients of the previous layer
207  CalculateConvActivationGradients(activation_gradients_backward, df, weights, batchSize, inputHeight, inputWidth,
208  depth, height, width, filterDepth, filterHeight, filterWidth);
209 
210  // Calculate the weight gradients
211  CalculateConvWeightGradients(weight_gradients, df, activations_backward, batchSize, inputHeight, inputWidth, depth,
212  height, width, filterDepth, filterHeight, filterWidth, nLocalViews);
213 
214  // Calculate the bias gradients
215  CalculateConvBiasGradients(bias_gradients, df, batchSize, depth, nLocalViews);
216 }
217 
218 //______________________________________________________________________________
219 template <typename AReal>
220 void TReference<AReal>::CalculateConvActivationGradients(std::vector<TMatrixT<AReal>> &activation_gradients_backward,
221  const std::vector<TMatrixT<AReal>> &df,
222  const TMatrixT<AReal> &weights, size_t batchSize,
223  size_t inputHeight, size_t inputWidth, size_t depth,
224  size_t height, size_t width, size_t filterDepth,
225  size_t filterHeight, size_t filterWidth)
226 {
227 
228  if (activation_gradients_backward.size() == 0) return;
229  // need to implement
230  // Transform the weights
231  TMatrixT<AReal> rotWeights(filterDepth, depth * filterHeight * filterWidth);
232  RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.GetNrows());
233 
234  // Calculate the zero paddings
235  size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight - height + filterHeight - 1) / 2));
236  size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth - width + filterWidth - 1) / 2));
237 
238  // Calculate the number of local views and the number of pixles in each view
239  size_t tempNLocalViews = inputHeight * inputWidth;
240  size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
241 
242  size_t tempStrideRows = 1;
243  size_t tempStrideCols = 1;
244 
245  // An entire convolution follows
246  for (size_t i = 0; i < batchSize; i++) {
247  TMatrixT<AReal> dfTr(tempNLocalViews, tempNLocalViewPixels);
248  Im2col(dfTr, df[i], inputHeight, inputWidth, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
249  tempZeroPaddingHeight, tempZeroPaddingWidth);
250 
251  activation_gradients_backward[i].MultT(rotWeights, dfTr);
252  }
253 
254  return ;
255 }
256 
257 //______________________________________________________________________________
258 template <typename AReal>
260  const std::vector<TMatrixT<AReal>> &df,
261  const std::vector<TMatrixT<AReal>> &activations_backward,
262  size_t batchSize, size_t inputHeight, size_t inputWidth,
263  size_t depth, size_t height, size_t width, size_t filterDepth,
264  size_t filterHeight, size_t filterWidth, size_t nLocalViews)
265 {
266 
267  // reinitialize the weight gradients to 0
268  for (Int_t i = 0; i < weight_gradients.GetNrows(); i++) {
269  for (Int_t j = 0; j < weight_gradients.GetNcols(); j++) {
270  weight_gradients(i, j) = 0;
271  }
272  }
273  for (size_t i = 0; i < batchSize; i++) {
274  // Calculate the zero paddings
275  size_t tempZeroPaddingHeight = (filterHeight - height + inputHeight - 1) / 2;
276  size_t tempZeroPaddingWidth = (filterWidth - width + inputWidth - 1) / 2;
277 
278  size_t tempNLocalViews = filterHeight * filterWidth;
279  size_t tempNLocalViewPixels = inputHeight * inputWidth;
280 
281  size_t tempStrideRows = 1;
282  size_t tempStrideCols = 1;
283 
284  for (size_t j = 0; j < depth; j++) {
285 
286  // row matrix
287  TMatrixT<AReal> rowDelta(1, nLocalViews);
288  for (size_t k = 0; k < nLocalViews; k++) {
289  rowDelta(0, k) = df[i](j, k);
290  }
291 
292  // convolution
293  TMatrixT<AReal> res(filterDepth, filterHeight * filterWidth);
294 
295  TMatrixT<AReal> rowDeltaTr(tempNLocalViews, tempNLocalViewPixels);
296  Im2col(rowDeltaTr, rowDelta, height, width, inputHeight, inputWidth, tempStrideRows, tempStrideCols,
297  tempZeroPaddingHeight, tempZeroPaddingWidth);
298 
299  res.MultT(activations_backward[i], rowDeltaTr);
300 
301  for (size_t k = 0; k < filterDepth; k++) {
302  for (size_t l = 0; l < filterHeight * filterWidth; l++) {
303  weight_gradients(j, k * (filterHeight * filterWidth) + l) += res(k, (tempNLocalViews - 1) - l);
304  }
305  }
306  }
307  }
308 #if 0
309  // to remove warning
310  (void)weight_gradients;
311  (void)df;
312  (void)activations_backward;
313  (void) batchSize;
314  (void) inputHeight;
315  (void)inputWidth;
316  (void)depth;
317  (void)height;
318  (void) width;
319  (void)filterDepth;
320  (void)filterHeight;
321  (void)filterWidth;
322  (void)nLocalViews;
323 #endif
324 }
325 
326 //______________________________________________________________________________
327 template <typename AReal>
328 void TReference<AReal>::CalculateConvBiasGradients(TMatrixT<AReal> &bias_gradients, const std::vector<TMatrixT<AReal>> &df,
329  size_t batchSize, size_t depth, size_t nLocalViews)
330 {
331  for (size_t i = 0; i < depth; i++) {
332  AReal sum = 0;
333  for (size_t j = 0; j < nLocalViews; j++) {
334  for (size_t k = 0; k < batchSize; k++) {
335  sum += df[k](i, j);
336  }
337  }
338  bias_gradients(i, 0) = sum;
339  }
340 }
341 #endif
342 
343 //______________________________________________________________________________
344 template <typename AReal>
346  size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows,
347  size_t strideCols)
348 {
349  // image boudaries
350  int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;
351  int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;
352  size_t currLocalView = 0;
353 
354  // centers
355  for (int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {
356  for (int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {
357  // within local views
358  for (int m = 0; m < C.GetNrows(); m++) {
359  AReal value = -std::numeric_limits<AReal>::max();
360 
361  for (int k = i - Int_t(fltHeight) / 2; k <= i + (Int_t(fltHeight) - 1) / 2; k++) {
362  for (int l = j - Int_t(fltWidth) / 2; l <= j + (Int_t(fltWidth) - 1) / 2; l++) {
363  if (C(m, k * imgWidth + l) > value) {
364  value = C(m, k * imgWidth + l);
365  B(m, currLocalView) = k * imgWidth + l;
366  }
367  }
368  }
369  A(m, currLocalView) = value;
370  }
371  currLocalView++;
372  }
373  }
374 }
375 
376 //______________________________________________________________________________
377 template <typename AReal>
378 void TReference<AReal>::MaxPoolLayerBackward(std::vector<TMatrixT<AReal>> &activationGradientsBackward,
379  const std::vector<TMatrixT<AReal>> &activationGradients,
380  const std::vector<TMatrixT<AReal>> &indexMatrix, size_t batchSize,
381  size_t depth, size_t nLocalViews)
382 {
383  for (size_t i = 0; i < batchSize; i++) {
384  for (size_t j = 0; j < depth; j++) {
385 
386  // initialize to zeros
387  for (size_t t = 0; t < (size_t)activationGradientsBackward[i].GetNcols(); t++) {
388  activationGradientsBackward[i][j][t] = 0;
389  }
390 
391  // set values
392  for (size_t k = 0; k < nLocalViews; k++) {
393  AReal grad = activationGradients[i][j][k];
394  size_t winningIdx = indexMatrix[i][j][k];
395  activationGradientsBackward[i][j][winningIdx] = grad;
396  }
397  }
398  }
399 }
400 
401 //______________________________________________________________________________
402 template <typename AReal>
404 {
405  auto nColsA = A.GetNcols();
406  auto nColsB = B.GetNcols();
407 
408  for (Int_t i = 0; i < A.GetNrows(); i++) {
409  for (Int_t j = 0; j < A.GetNcols(); j++) {
410  auto nElem = i * nColsA + j;
411  A(i, j) = B(nElem / nColsB, (nElem - 1) % nColsB);
412  }
413  }
414 }
415 
416 //______________________________________________________________________________
417 template <typename AReal>
418 void TReference<AReal>::Flatten(TMatrixT<AReal> &A, const std::vector<TMatrixT<AReal>> &B, size_t size, size_t nRows,
419  size_t nCols)
420 {
421  for (size_t i = 0; i < (size_t)size; i++) {
422  for (size_t j = 0; j < (size_t)nRows; j++) {
423  for (size_t k = 0; k < (size_t)nCols; k++) {
424  A(i, j * nCols + k) = B[i](j, k);
425  }
426  }
427  }
428 }
429 
430 //______________________________________________________________________________
431 template <typename AReal>
432 void TReference<AReal>::Deflatten(std::vector<TMatrixT<AReal>> &A, const TMatrixT<AReal> &B, size_t size, size_t nRows,
433  size_t nCols)
434 {
435  for (size_t i = 0; i < (size_t)size; i++) {
436  for (size_t j = 0; j < (size_t)nRows; j++) {
437  for (size_t k = 0; k < (size_t)nCols; k++) {
438  A[i](j, k) = B(i, j * nCols + k);
439  }
440  }
441  }
442 }
443 
444 //______________________________________________________________________________
445 template <typename AReal>
446 void TReference<AReal>::Rearrange(std::vector<TMatrixT<AReal>> &out, const std::vector<TMatrixT<AReal>> &in)
447 {
448  // B x T x D out --- T x B x D in*/
449  auto B = out.size();
450  auto T = out[0].GetNrows();
451  auto D = out[0].GetNcols();
452  if ((T != (Int_t)in.size()) || (Int_t(B) != in[0].GetNrows()) || (D != in[0].GetNcols())) {
453  std::cout << "Incompatible Dimensions\n"
454  << in.size() << "x" << in[0].GetNrows() << "x" << in[0].GetNcols() << " --> " << B << "x" << T << "x"
455  << D << "\n";
456  return;
457  }
458  for (size_t i = 0; i < B; ++i) {
459  for (Int_t j = 0; j < T; ++j) {
460  for (Int_t k = 0; k < D; ++k) {
461  out[i](j, k) = in[j](i, k);
462  }
463  }
464  }
465  return;
466 }
467 
468 } // namespace DNN
469 } // namespace TMVA
static double B[]
static long int sum(long int i)
Definition: Factory.cxx:2258
static void MaxPoolLayerBackward(std::vector< TMatrixT< AReal >> &activationGradientsBackward, const std::vector< TMatrixT< AReal >> &activationGradients, const std::vector< TMatrixT< AReal >> &indexMatrix, size_t batchSize, size_t depth, size_t nLocalViews)
Perform the complete backward propagation step in a Max Pooling Layer.
auto * m
Definition: textangle.C:8
static void AddConvBiases(TMatrixT< AReal > &output, const TMatrixT< AReal > &biases)
Add the biases in the Convolutional Layer.
static void Im2col(TMatrixT< AReal > &A, TMatrixT< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A...
double T(double x)
Definition: ChebyshevPol.h:34
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
Definition: TGaxis.cxx:2551
Int_t GetNcols() const
Definition: TMatrixTBase.h:125
int Int_t
Definition: RtypesCore.h:41
static double A[]
double beta(double x, double y)
Calculates the beta function.
Int_t GetNoElements() const
Definition: TMatrixTBase.h:126
The reference architecture class.
Definition: DataLoader.h:30
static void Flatten(TMatrixT< AReal > &A, const std::vector< TMatrixT< AReal >> &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A...
void MultT(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
General matrix multiplication. Create a matrix C such that C = A * B^T.
Definition: TMatrixT.cxx:951
static void Copy(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B)
Definition: Propagation.cxx:86
void TMult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
Create a matrix C such that C = A&#39; * B.
Definition: TMatrixT.cxx:852
static void ConvLayerBackward(std::vector< TMatrixT< AReal >> &, TMatrixT< AReal > &, TMatrixT< AReal > &, std::vector< TMatrixT< AReal >> &, const std::vector< TMatrixT< AReal >> &, const TMatrixT< AReal > &, const std::vector< TMatrixT< AReal >> &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
Perform the complete backward propagation step in a Convolutional Layer.
Definition: Reference.h:370
static void Reshape(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
static void RotateWeights(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A...
static void Backward(TMatrixT< Scalar_t > &activationGradientsBackward, TMatrixT< Scalar_t > &weightGradients, TMatrixT< Scalar_t > &biasGradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &activationGradients, const TMatrixT< Scalar_t > &weights, const TMatrixT< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.cxx:40
static double C[]
static void Deflatten(std::vector< TMatrixT< AReal >> &A, const TMatrixT< Scalar_t > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
double floor(double)
Int_t GetNrows() const
Definition: TMatrixTBase.h:122
static void SumColumns(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A...
void Copy(void *source, void *dest)
static void Downsample(TMatrixT< AReal > &A, TMatrixT< AReal > &B, const TMatrixT< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Propagation.cxx:76
typedef void((*Func_t)())
Abstract ClassifierFactory template that handles arbitrary types.
auto * l
Definition: textangle.C:4
void Mult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
General matrix multiplication. Create a matrix C such that C = A * B.
Definition: TMatrixT.cxx:648
static void Rearrange(std::vector< TMatrixT< AReal >> &out, const std::vector< TMatrixT< AReal >> &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void MultiplyTranspose(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &input, const TMatrixT< Scalar_t > &weights)
Matrix-multiply input with the transpose of and write the results into output.
Definition: Propagation.cxx:23
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.cxx:30
const Int_t n
Definition: legend1.C:16