Logo ROOT  
Reference Guide
Propagation.hxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$ // Author: Simon Pfreundschuh 10/07/16
2
3/*************************************************************************
4 * Copyright (C) 2016, Simon Pfreundschuh *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11/////////////////////////////////////////////////////////////////////
12// Implementation of the functions required for the forward and //
13// backward propagation of activations through a neural network in //
14// the reference implementation. //
15/////////////////////////////////////////////////////////////////////
16
18
19namespace TMVA {
20namespace DNN {
21
22template <typename AReal>
24 const TMatrixT<AReal> &weights)
25{
26 output.MultT(input, weights);
27}
28
29template <typename AReal>
31{
32 for (size_t i = 0; i < (size_t)output.GetNrows(); i++) {
33 for (size_t j = 0; j < (size_t)output.GetNcols(); j++) {
34 output(i, j) += biases(j, 0);
35 }
36 }
37}
38
39template <typename AReal>
40void TReference<AReal>::Backward(TMatrixT<AReal> &activation_gradients_backward, TMatrixT<AReal> &weight_gradients,
41 TMatrixT<AReal> &bias_gradients, TMatrixT<AReal> &df,
42 const TMatrixT<AReal> &activation_gradients, const TMatrixT<AReal> &weights,
43 const TMatrixT<AReal> &activations_backward)
44{
45
46 // Compute element-wise product.
47 for (size_t i = 0; i < (size_t)df.GetNrows(); i++) {
48 for (size_t j = 0; j < (size_t)df.GetNcols(); j++) {
49 df(i, j) *= activation_gradients(i, j);
50 }
51 }
52
53 // Activation gradients.
54 if (activation_gradients_backward.GetNoElements() > 0) {
55 activation_gradients_backward.Mult(df, weights);
56 }
57
58 // Weights gradients.
59 if (weight_gradients.GetNoElements() > 0) {
60 weight_gradients.TMult(df, activations_backward);
61 }
62
63 // Bias gradients.
64 if (bias_gradients.GetNoElements() > 0) {
65 for (size_t j = 0; j < (size_t)df.GetNcols(); j++) {
66 AReal sum = 0.0;
67 for (size_t i = 0; i < (size_t)df.GetNrows(); i++) {
68 sum += df(i, j);
69 }
70 bias_gradients(j, 0) = sum;
71 }
72 }
73}
74
75template <typename AReal>
77{
78 for (size_t i = 0; i < (size_t)A.GetNrows(); i++) {
79 for (size_t j = 0; j < (size_t)A.GetNcols(); j++) {
80 A(i, j) += beta * B(i, j);
81 }
82 }
83}
84
85template <typename AReal>
87{
88 A = B;
89}
90
91template <typename AReal>
92void TReference<AReal>::ScaleAdd(std::vector<TMatrixT<AReal>> &A, const std::vector<TMatrixT<AReal>> &B, AReal beta)
93{
94 for (size_t i = 0; i < A.size(); ++i) {
95 ScaleAdd(A[i], B[i], beta);
96 }
97}
98
99template <typename AReal>
100void TReference<AReal>::Copy(std::vector<TMatrixT<AReal>> &A, const std::vector<TMatrixT<AReal>> &B)
101{
102 for (size_t i = 0; i < A.size(); ++i) {
103 Copy(A[i], B[i]);
104 }
105}
106
107//______________________________________________________________________________
108template <typename AReal>
109void TReference<AReal>::Im2col(TMatrixT<AReal> &A, const TMatrixT<AReal> &B, size_t imgHeight, size_t imgWidth,
110 size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
111 size_t zeroPaddingHeight, size_t zeroPaddingWidth)
112{
113 // image boudaries
114 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
115 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
116 size_t currLocalView = 0;
117
118 // convolution centers
119 for (int i = -1*int(zeroPaddingHeight) + fltHeight / 2; i <= imgHeightBound; i += strideRows) {
120 for (int j = -1*int(zeroPaddingWidth) + fltWidth / 2; j <= imgWidthBound; j += strideCols) {
121 size_t currLocalViewPixel = 0;
122
123 // within the local view
124 for (int m = 0; m < B.GetNrows(); m++) {
125 for (Int_t k = i - Int_t(fltHeight) / 2; k <= i + (Int_t(fltHeight) - 1) / 2; k++) {
126 for (Int_t l = j - Int_t(fltWidth) / 2; l <= j + (Int_t(fltWidth) - 1) / 2; l++) {
127
128 // Check the boundaries
129 if (k < 0 || k >= Int_t(imgHeight) || l < 0 || l >= Int_t(imgWidth))
130 A(currLocalView, currLocalViewPixel++) = 0;
131 else
132 A(currLocalView, currLocalViewPixel++) = B(m, k * imgWidth + l);
133 }
134 }
135 }
136
137 currLocalView++;
138 }
139 }
140}
141
142//______________________________________________________________________________
143template <typename AReal>
145 size_t filterHeight, size_t filterWidth, size_t numFilters)
146{
147 size_t jump = filterHeight * filterWidth;
148 for (size_t j = 0; j < filterDepth; j++) {
149 for (size_t k = 0; k < numFilters; k++) {
150 for (size_t i = 0; i < jump; i++) {
151 A(j, k * jump + i) = B(k, ((j + 1) * jump - 1) - i);
152 }
153 }
154 }
155}
156
157//______________________________________________________________________________
158template <typename AReal>
160{
161 for (size_t i = 0; i < (size_t)output.GetNrows(); i++) {
162 for (size_t j = 0; j < (size_t)output.GetNcols(); j++) {
163 output(i, j) += biases(i, 0);
164 }
165 }
166}
167
168#ifdef HAVE_CNN_REFERENCE
169//______________________________________________________________________________
170template <typename AReal>
171void TReference<AReal>::ConvLayerBackward(std::vector<TMatrixT<AReal>> &activation_gradients_backward,
172 TMatrixT<AReal> &weight_gradients, TMatrixT<AReal> &bias_gradients,
173 std::vector<TMatrixT<AReal>> &df,
174 const std::vector<TMatrixT<AReal>> &activation_gradients,
175 const TMatrixT<AReal> &weights,
176 const std::vector<TMatrixT<AReal>> &activations_backward, size_t batchSize,
177 size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
178 size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth,
179 size_t nLocalViews)
180{
181
182 // Update derivatives
183 size_t m, n;
184 m = activation_gradients[0].GetNrows();
185 n = activation_gradients[0].GetNcols();
186
187 for (size_t i = 0; i < batchSize; i++) {
188 for (size_t j = 0; j < (size_t)m; j++) {
189 for (size_t k = 0; k < (size_t)n; k++) {
190 df[i](j, k) *= activation_gradients[i](j, k);
191 }
192 }
193 }
194
195 // Calculate the activation gradients of the previous layer
196 CalculateConvActivationGradients(activation_gradients_backward, df, weights, batchSize, inputHeight, inputWidth,
197 depth, height, width, filterDepth, filterHeight, filterWidth);
198
199 // Calculate the weight gradients
200 CalculateConvWeightGradients(weight_gradients, df, activations_backward, batchSize, inputHeight, inputWidth, depth,
201 height, width, filterDepth, filterHeight, filterWidth, nLocalViews);
202
203 // Calculate the bias gradients
204 CalculateConvBiasGradients(bias_gradients, df, batchSize, depth, nLocalViews);
205}
206
207//______________________________________________________________________________
208template <typename AReal>
209void TReference<AReal>::CalculateConvActivationGradients(std::vector<TMatrixT<AReal>> &activation_gradients_backward,
210 const std::vector<TMatrixT<AReal>> &df,
211 const TMatrixT<AReal> &weights, size_t batchSize,
212 size_t inputHeight, size_t inputWidth, size_t depth,
213 size_t height, size_t width, size_t filterDepth,
214 size_t filterHeight, size_t filterWidth)
215{
216
217 if (activation_gradients_backward.size() == 0) return;
218 // need to implement
219 // Transform the weights
220 TMatrixT<AReal> rotWeights(filterDepth, depth * filterHeight * filterWidth);
221 RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.GetNrows());
222
223 // Calculate the zero paddings
224 size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight - height + filterHeight - 1) / 2));
225 size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth - width + filterWidth - 1) / 2));
226
227 // Calculate the number of local views and the number of pixles in each view
228 size_t tempNLocalViews = inputHeight * inputWidth;
229 size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
230
231 size_t tempStrideRows = 1;
232 size_t tempStrideCols = 1;
233
234 // An entire convolution follows
235 for (size_t i = 0; i < batchSize; i++) {
236 TMatrixT<AReal> dfTr(tempNLocalViews, tempNLocalViewPixels);
237 Im2col(dfTr, df[i], inputHeight, inputWidth, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
238 tempZeroPaddingHeight, tempZeroPaddingWidth);
239
240 activation_gradients_backward[i].MultT(rotWeights, dfTr);
241 }
242
243 return ;
244}
245
246//______________________________________________________________________________
247template <typename AReal>
248void TReference<AReal>::CalculateConvWeightGradients(TMatrixT<AReal> &weight_gradients,
249 const std::vector<TMatrixT<AReal>> &df,
250 const std::vector<TMatrixT<AReal>> &activations_backward,
251 size_t batchSize, size_t inputHeight, size_t inputWidth,
252 size_t depth, size_t height, size_t width, size_t filterDepth,
253 size_t filterHeight, size_t filterWidth, size_t nLocalViews)
254{
255
256 // reinitialize the weight gradients to 0
257 for (Int_t i = 0; i < weight_gradients.GetNrows(); i++) {
258 for (Int_t j = 0; j < weight_gradients.GetNcols(); j++) {
259 weight_gradients(i, j) = 0;
260 }
261 }
262 for (size_t i = 0; i < batchSize; i++) {
263 // Calculate the zero paddings
264 size_t tempZeroPaddingHeight = (filterHeight - height + inputHeight - 1) / 2;
265 size_t tempZeroPaddingWidth = (filterWidth - width + inputWidth - 1) / 2;
266
267 size_t tempNLocalViews = filterHeight * filterWidth;
268 size_t tempNLocalViewPixels = inputHeight * inputWidth;
269
270 size_t tempStrideRows = 1;
271 size_t tempStrideCols = 1;
272
273 for (size_t j = 0; j < depth; j++) {
274
275 // row matrix
276 TMatrixT<AReal> rowDelta(1, nLocalViews);
277 for (size_t k = 0; k < nLocalViews; k++) {
278 rowDelta(0, k) = df[i](j, k);
279 }
280
281 // convolution
282 TMatrixT<AReal> res(filterDepth, filterHeight * filterWidth);
283
284 TMatrixT<AReal> rowDeltaTr(tempNLocalViews, tempNLocalViewPixels);
285 Im2col(rowDeltaTr, rowDelta, height, width, inputHeight, inputWidth, tempStrideRows, tempStrideCols,
286 tempZeroPaddingHeight, tempZeroPaddingWidth);
287
288 res.MultT(activations_backward[i], rowDeltaTr);
289
290 for (size_t k = 0; k < filterDepth; k++) {
291 for (size_t l = 0; l < filterHeight * filterWidth; l++) {
292 weight_gradients(j, k * (filterHeight * filterWidth) + l) += res(k, (tempNLocalViews - 1) - l);
293 }
294 }
295 }
296 }
297#if 0
298 // to remove warning
299 (void)weight_gradients;
300 (void)df;
301 (void)activations_backward;
302 (void) batchSize;
303 (void) inputHeight;
304 (void)inputWidth;
305 (void)depth;
306 (void)height;
307 (void) width;
308 (void)filterDepth;
309 (void)filterHeight;
310 (void)filterWidth;
311 (void)nLocalViews;
312#endif
313}
314
315//______________________________________________________________________________
316template <typename AReal>
317void TReference<AReal>::CalculateConvBiasGradients(TMatrixT<AReal> &bias_gradients, const std::vector<TMatrixT<AReal>> &df,
318 size_t batchSize, size_t depth, size_t nLocalViews)
319{
320 for (size_t i = 0; i < depth; i++) {
321 AReal sum = 0;
322 for (size_t j = 0; j < nLocalViews; j++) {
323 for (size_t k = 0; k < batchSize; k++) {
324 sum += df[k](i, j);
325 }
326 }
327 bias_gradients(i, 0) = sum;
328 }
329}
330#endif
331
332//______________________________________________________________________________
333template <typename AReal>
335 size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows,
336 size_t strideCols)
337{
338 // image boudaries
339 int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;
340 int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;
341 size_t currLocalView = 0;
342
343 // centers
344 for (int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {
345 for (int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {
346 // within local views
347 for (int m = 0; m < C.GetNrows(); m++) {
348 AReal value = -std::numeric_limits<AReal>::max();
349
350 for (int k = i - Int_t(fltHeight) / 2; k <= i + (Int_t(fltHeight) - 1) / 2; k++) {
351 for (int l = j - Int_t(fltWidth) / 2; l <= j + (Int_t(fltWidth) - 1) / 2; l++) {
352 if (C(m, k * imgWidth + l) > value) {
353 value = C(m, k * imgWidth + l);
354 B(m, currLocalView) = k * imgWidth + l;
355 }
356 }
357 }
358 A(m, currLocalView) = value;
359 }
360 currLocalView++;
361 }
362 }
363}
364
365//______________________________________________________________________________
366template <typename AReal>
368 const TMatrixT<AReal> &activationGradients,
369 const TMatrixT<AReal> &indexMatrix,
370 size_t /* imgHeight */, size_t /* imgWidth */, size_t /* fltHeight */,
371 size_t /* fltWidth */, size_t /* strideRows */, size_t /* strideCols */,
372 size_t nLocalViews)
373{
374 size_t depth = activationGradientsBackward.GetNrows();
375
376 for (size_t j = 0; j < depth; j++) {
377 // initialize to zeros
378 for (size_t t = 0; t < (size_t)activationGradientsBackward.GetNcols(); t++) {
379 activationGradientsBackward[j][t] = 0;
380 }
381
382 // set values
383 for (size_t k = 0; k < nLocalViews; k++) {
384 AReal grad = activationGradients[j][k];
385 size_t winningIdx = indexMatrix[j][k];
386 activationGradientsBackward[j][winningIdx] += grad;
387 }
388 }
389}
390
391//______________________________________________________________________________
392template <typename AReal>
394{
395 auto nColsA = A.GetNcols();
396 auto nColsB = B.GetNcols();
397
398 for (Int_t i = 0; i < A.GetNrows(); i++) {
399 for (Int_t j = 0; j < A.GetNcols(); j++) {
400 auto nElem = i * nColsA + j;
401 A(i, j) = B(nElem / nColsB, nElem % nColsB);
402 }
403 }
404}
405
406//______________________________________________________________________________
407template <typename AReal>
408void TReference<AReal>::Flatten(TMatrixT<AReal> &A, const std::vector<TMatrixT<AReal>> &B, size_t size, size_t nRows,
409 size_t nCols)
410{
411 for (size_t i = 0; i < (size_t)size; i++) {
412 for (size_t j = 0; j < (size_t)nRows; j++) {
413 for (size_t k = 0; k < (size_t)nCols; k++) {
414 A(i, j * nCols + k) = B[i](j, k);
415 }
416 }
417 }
418}
419
420//______________________________________________________________________________
421template <typename AReal>
422void TReference<AReal>::Deflatten(std::vector<TMatrixT<AReal>> &A, const TMatrixT<AReal> &B, size_t size, size_t nRows,
423 size_t nCols)
424{
425 for (size_t i = 0; i < (size_t)size; i++) {
426 for (size_t j = 0; j < (size_t)nRows; j++) {
427 for (size_t k = 0; k < (size_t)nCols; k++) {
428 A[i](j, k) = B(i, j * nCols + k);
429 }
430 }
431 }
432}
433
434//______________________________________________________________________________
435template <typename AReal>
436void TReference<AReal>::Rearrange(std::vector<TMatrixT<AReal>> &out, const std::vector<TMatrixT<AReal>> &in)
437{
438 // B x T x D out --- T x B x D in*/
439 auto B = out.size();
440 auto T = out[0].GetNrows();
441 auto D = out[0].GetNcols();
442 if ((T != (Int_t)in.size()) || (Int_t(B) != in[0].GetNrows()) || (D != in[0].GetNcols())) {
443 std::cout << "Incompatible Dimensions\n"
444 << in.size() << "x" << in[0].GetNrows() << "x" << in[0].GetNcols() << " --> " << B << "x" << T << "x"
445 << D << "\n";
446 return;
447 }
448 for (size_t i = 0; i < B; ++i) {
449 for (Int_t j = 0; j < T; ++j) {
450 for (Int_t k = 0; k < D; ++k) {
451 out[i](j, k) = in[j](i, k);
452 }
453 }
454 }
455 return;
456}
457
458} // namespace DNN
459} // namespace TMVA
int Int_t
Definition: RtypesCore.h:43
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
double floor(double)
typedef void((*Func_t)())
static void MultiplyTranspose(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &input, const TMatrixT< Scalar_t > &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
Definition: Propagation.hxx:23
static void Flatten(TMatrixT< AReal > &A, const std::vector< TMatrixT< AReal > > &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void MaxPoolLayerBackward(TMatrixT< AReal > &activationGradientsBackward, const TMatrixT< AReal > &activationGradients, const TMatrixT< AReal > &indexMatrix, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCol, size_t nLocalViews)
Perform the complete backward propagation step in a Max Pooling Layer.
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.hxx:30
static void Downsample(TMatrixT< AReal > &A, TMatrixT< AReal > &B, const TMatrixT< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void Backward(TMatrixT< Scalar_t > &activationGradientsBackward, TMatrixT< Scalar_t > &weightGradients, TMatrixT< Scalar_t > &biasGradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &activationGradients, const TMatrixT< Scalar_t > &weights, const TMatrixT< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.hxx:40
static void RotateWeights(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void Rearrange(std::vector< TMatrixT< AReal > > &out, const std::vector< TMatrixT< AReal > > &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void Deflatten(std::vector< TMatrixT< AReal > > &A, const TMatrixT< Scalar_t > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
static void Im2col(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Propagation.hxx:76
static void ConvLayerBackward(std::vector< TMatrixT< AReal > > &, TMatrixT< AReal > &, TMatrixT< AReal > &, std::vector< TMatrixT< AReal > > &, const std::vector< TMatrixT< AReal > > &, const TMatrixT< AReal > &, const std::vector< TMatrixT< AReal > > &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
Perform the complete backward propagation step in a Convolutional Layer.
Definition: Reference.h:459
static void AddConvBiases(TMatrixT< AReal > &output, const TMatrixT< AReal > &biases)
Add the biases in the Convolutional Layer.
static void Copy(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B)
Definition: Propagation.hxx:86
static void Reshape(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
Int_t GetNrows() const
Definition: TMatrixTBase.h:124
Int_t GetNoElements() const
Definition: TMatrixTBase.h:128
Int_t GetNcols() const
Definition: TMatrixTBase.h:127
void TMult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
Create a matrix C such that C = A' * B.
Definition: TMatrixT.cxx:855
void Mult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
General matrix multiplication. Create a matrix C such that C = A * B.
Definition: TMatrixT.cxx:651
double beta(double x, double y)
Calculates the beta function.
const Int_t n
Definition: legend1.C:16
static double B[]
static double A[]
static double C[]
double T(double x)
Definition: ChebyshevPol.h:34
void Copy(void *source, void *dest)
create variable transformations
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
static long int sum(long int i)
Definition: Factory.cxx:2275
static void output(int code)
Definition: gifencode.c:226