Logo ROOT   6.16/01
Reference Guide
Propagation.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 10/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////////////////
13// Implementation of the functions required for the forward and //
14// backward propagation of activations through a neural network for //
15// the reference implementation. //
16//////////////////////////////////////////////////////////////////////
17
20
21namespace TMVA {
22namespace DNN {
23
24template <typename AFloat>
26 const TCpuMatrix<AFloat> &Weights)
27{
28
29 int m = (int)input.GetNrows();
30 int k = (int)input.GetNcols();
31 int n = (int)Weights.GetNrows();
32
33 if ((int)output.GetNrows() != m) {
34 Error("MultiplyTranspose","Invalid input - output rows - input: %d != output : %d",m, (int) output.GetNrows());
35 R__ASSERT((int) output.GetNrows() == m);
36 }
37 if ((int)output.GetNcols() != n) {
38 Error("MultiplyTranspose","Invalid output cols or weight rows - output cols: %d != weight rows : %d",(int) output.GetNcols(),n);
39 R__ASSERT((int) output.GetNcols() == n);
40 }
41 if ((int)Weights.GetNcols() != k) {
42 Error("MultiplyTranspose","Invalid input cols or weight cols - input cols: %d != weight cols : %d", k, (int) Weights.GetNcols());
43 R__ASSERT((int) Weights.GetNcols() == k);
44 }
45
46 char transa = 'N';
47 char transb = 'T';
48
49 AFloat alpha = 1.0;
50 AFloat beta = 0.0;
51
52 const AFloat *A = input.GetRawDataPointer();
53 const AFloat *B = Weights.GetRawDataPointer();
54 AFloat *C = output.GetRawDataPointer();
55
56 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha, A, &m, B, &n, &beta, C, &m);
57}
58
59template <typename AFloat>
61{
62 int m = (int)output.GetNrows();
63 int n = (int)output.GetNcols();
64
65 int inc = 1.0;
66 AFloat alpha = 1.0;
67
68 AFloat *A = output.GetRawDataPointer();
69 const AFloat *x = TCpuMatrix<AFloat>::GetOnePointer();
70 const AFloat *y = biases.GetRawDataPointer();
71
73 R__ASSERT(n <= (int)(biases.GetNcols()*biases.GetNrows()));
74
75 ::TMVA::DNN::Blas::Ger(&m, &n, &alpha, x, &inc, y, &inc, A, &m);
76}
77
78template <typename AFloat>
79void TCpu<AFloat>::Backward(TCpuMatrix<AFloat> &activationGradientsBackward, TCpuMatrix<AFloat> &weightGradients,
80 TCpuMatrix<AFloat> &biasGradients, TCpuMatrix<AFloat> &df,
81 const TCpuMatrix<AFloat> &activationGradients, const TCpuMatrix<AFloat> &weights,
82 const TCpuMatrix<AFloat> &activationsBackward)
83{
84 // Compute element-wise product.
85 Hadamard(df, activationGradients);
86
87 // Activation gradients.
88 if (activationGradientsBackward.GetNoElements() > 0) Multiply(activationGradientsBackward, df, weights);
89
90 // Weight gradients.
91 if (weightGradients.GetNoElements() > 0) TransposeMultiply(weightGradients, df, activationsBackward);
92
93 // Bias gradients.
94 if (biasGradients.GetNoElements() > 0) SumColumns(biasGradients, df);
95}
96
97//____________________________________________________________________________
98template <typename AFloat>
99void TCpu<AFloat>::Im2col(TCpuMatrix<AFloat> &A, const TCpuMatrix<AFloat> &B, size_t imgHeight, size_t imgWidth,
100 size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
101 size_t zeroPaddingHeight, size_t zeroPaddingWidth)
102{
103
104 // image boudaries
105 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
106 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
107 size_t currLocalView = 0;
108
109 const int halfFltHeight = fltHeight / 2;
110 const int halfFltWidth = fltWidth / 2;
111 const int halfFltHeightM1 = (fltHeight - 1) / 2;
112 const int halfFltWidthM1 = (fltWidth - 1) / 2;
113 const int nRowsInput = B.GetNrows();
114 const int nColsInput = B.GetNcols();
115 const int nRowsOutput = A.GetNrows();
116 const int nColsOutput = A.GetNcols();
117
118 // convolution centers
119 for (int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
120 for (int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
121 size_t currLocalViewPixel = 0;
122
123 // within the local view
124 R__ASSERT((int) currLocalView < nRowsOutput );
125
126 for (int m = 0; m < nRowsInput; m++) {
127 for (int k = i - halfFltHeight ; k <= Int_t(i + halfFltHeightM1 ); k++) {
128 int kstep = k * imgWidth;
129 for (int l = j - halfFltWidth ; l <= Int_t(j + halfFltWidthM1); l++) {
130
131 // Check the boundaries
132 R__ASSERT((int) currLocalViewPixel < nColsOutput );
133 //R__ASSERT(k * imgWidth + l < B.GetNcols());
134 if (k < 0 || k >= (Int_t)imgHeight || l < 0 || l >= (Int_t)imgWidth || kstep + l >= nColsInput)
135 A(currLocalView, currLocalViewPixel++) = 0;
136 else
137 A(currLocalView, currLocalViewPixel++) = B(m, kstep + l);
138 }
139 }
140 }
141 //std::cout << " i " << i << " " << j << " increment currLocalView " << currLocalView << std::endl;
142 currLocalView++;
143 }
144 }
145 //TMVA_DNN_PrintTCpuMatrix(A,"FromIm2Col");
146}
147
148//____________________________________________________________________________
149template <typename AFloat>
150void TCpu<AFloat>::Im2colIndices(std::vector<int> &V, const TCpuMatrix<AFloat> &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth,
151 size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
152 size_t zeroPaddingHeight, size_t zeroPaddingWidth)
153{
154
155 // image boudaries
156 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
157 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
158 size_t currLocalView = 0;
159
160 const int halfFltHeight = fltHeight / 2;
161 const int halfFltWidth = fltWidth / 2;
162 const int halfFltHeightM1 = (fltHeight - 1) / 2;
163 const int halfFltWidthM1 = (fltWidth - 1) / 2;
164 const int nRowsInput = B.GetNrows();
165 const int nColsInput = B.GetNcols();
166 const size_t nSizeOutput = V.size();
167 const int npixels = nRowsInput * fltHeight * fltWidth;
168 // const int nRowsOutput = A.GetNrows();
169 // const int nColsOutput = A.GetNcols();
170
171 // convolution centers
172 for (int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
173 for (int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
174 size_t currLocalViewPixel = 0;
175
176 // within the local view
177 //R__ASSERT((int) currLocalView < nRowsOutput );
178
179 for (int m = 0; m < nRowsInput; m++) {
180 for (int k = i - halfFltHeight ; k <= Int_t(i + halfFltHeightM1 ); k++) {
181 int kstep = k * imgWidth;
182 for (int l = j - halfFltWidth ; l <= Int_t(j + halfFltWidthM1); l++) {
183
184 // Check the boundaries
185 //R__ASSERT(currLocalViewPixel < nColsOutput );
186 R__ASSERT(currLocalView * npixels + currLocalViewPixel < nSizeOutput );
187 if (k < 0 || k >= (Int_t)imgHeight || l < 0 || l >= (Int_t)imgWidth || kstep + l >= nColsInput)
188 //V[currLocalView * npixels + currLocalViewPixel]=-1;
189 V[currLocalViewPixel * nLocalViews + currLocalView] = -1;
190 else
191 V[currLocalViewPixel * nLocalViews + currLocalView]= ( kstep + l) * nRowsInput + m;
192
193 currLocalViewPixel++;
194 }
195 }
196 }
197 currLocalView++;
198 }
199 }
200}
201template <typename AFloat>
202void TCpu<AFloat>::Im2colFast(TCpuMatrix<AFloat> &A, const TCpuMatrix<AFloat> &B, const std::vector<int> &V)
203{
204 size_t n = V.size();
205 R__ASSERT( n == A.GetNcols() * A.GetNrows() );
206 AFloat * a = A.GetRawDataPointer();
207 const AFloat * b = B.GetRawDataPointer();
208
209//#define DL_USE_MTE
210 // parallel execution
211#ifdef DL_USE_MTE
212 const size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(n);
213
214 auto f = [&](UInt_t workerID)
215 {
216 for (size_t j = 0; j < nsteps; ++j) {
217 size_t ii = workerID+j;
218 if (ii >= n) break;
219 int idx = V[ii];
220 if (idx >= 0) a[ii] = b[idx];
221 else a[ii] = 0;
222 }
223 return 0;
224 };
225
226 A.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,n,nsteps) );
227
228#else
229 //serial execution
230 for (size_t ii = 0; ii < n; ++ii) {
231 int idx = V[ii];
232 if (idx >= 0) a[ii] = b[idx];
233 else a[ii] = 0;
234 }
235
236#endif
237 // TMVA_DNN_PrintTCpuMatrix(A,"FromFastIm2Col");
238 // TMVA_DNN_PrintTCpuMatrix(B,"input to Im2Col");
239 // std::cout << "V vector " << V.size() << std::endl;
240 // for ( int i = 0; i < n; ++i) {
241 // std::cout << V[i] << " ";
242 // }
243 // std::cout << std::endl;
244}
245//____________________________________________________________________________
246template <typename AFloat>
248 size_t filterHeight, size_t filterWidth, size_t numFilters)
249{
250 size_t jump = filterHeight * filterWidth;
251 for (size_t j = 0; j < filterDepth; j++) {
252 for (size_t k = 0; k < numFilters; k++) {
253 for (size_t i = 0; i < jump; i++) {
254 A(j, k * jump + i) = B(k, ((j + 1) * jump - 1) - i);
255 //A(j, k * jump + i) = B(k, j * jump + i);
256 }
257 }
258 }
259}
260
261//____________________________________________________________________________
262template <typename AFloat>
264{
265 int m = (int)output.GetNrows();
266 int n = (int)output.GetNcols();
267
268 int inc = 1.0;
269 AFloat alpha = 1.0;
270
271 AFloat *A = output.GetRawDataPointer();
272 const AFloat *x = biases.GetRawDataPointer();
273 const AFloat *y = TCpuMatrix<AFloat>::GetOnePointer();
274
275 R__ASSERT(m <= (int)biases.GetNoElements() );
277
278 ::TMVA::DNN::Blas::Ger(&m, &n, &alpha, x, &inc, y, &inc, A, &m);
279}
280
281template<typename AFloat>
282size_t TCpu<AFloat>::calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
283{
284 size_t temp = imgDim - fltDim + 2 * padding;
285 if (temp % stride || temp + stride <= 0) {
286 Fatal("calculateDimension", "Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride) "
287 "%zu, %zu, %zu, %zu", imgDim, fltDim, padding, stride);
288 }
289 return temp / stride + 1;
290}
291
292//____________________________________________________________________________
293template <typename AFloat>
295 std::vector<TCpuMatrix<AFloat>> & derivatives,
296 const std::vector<TCpuMatrix<AFloat>> &input,
297 const TCpuMatrix<AFloat> &weights, const TCpuMatrix<AFloat> & biases,
298 const DNN::CNN::TConvParams & params, EActivationFunction activFunc,
299 std::vector<TCpuMatrix<AFloat>> & /* */)
300{
301 size_t height = calculateDimension(params.inputHeight, params.filterHeight, params.paddingHeight, params.strideRows);
302 size_t width = calculateDimension(params.inputWidth, params.filterWidth, params.paddingWidth, params.strideCols);
303 size_t nLocalViews = height * width;
304 size_t nLocalViewPixels = params.inputDepth * params.filterHeight * params.filterWidth;
305
306 R__ASSERT( input.size() > 0);
307 std::vector<int> forwardIndices(nLocalViews * nLocalViewPixels);
308 Im2colIndices(forwardIndices, input[0], nLocalViews, params.inputHeight, params.inputWidth, params.filterHeight,
309 params.filterWidth, params.strideRows, params.strideCols, params.paddingHeight, params.paddingWidth);
310
311 //this should fix multi-thread inizializations of arrays
313 TCpuMatrix<AFloat>::InitializeOneVector(output[0].GetNcols()); // since it is used in AddCOnvBiases
314
315
316 auto f = [&] (UInt_t i)
317 {
318 // dropout not yet implemented for CNN
319 // if (applyDropout && (dropoutProbability != 1.0)) {
320 // Dropout(input[i], dropoutProbability);
321 // }
322
323 TCpuMatrix<AFloat> inputTr(nLocalViews, nLocalViewPixels);
324 //inputTr.Zero(); // this is not thread safe
325
326 Im2colFast(inputTr, input[i], forwardIndices);
327
328 MultiplyTranspose(output[i], weights, inputTr);
329 AddConvBiases(output[i], biases);
330
331 evaluateDerivative<TCpu<AFloat>>(derivatives[i], activFunc, output[i]);
332 evaluate<TCpu<AFloat>>(output[i], activFunc);
333
334 };
335
337}
338
339//____________________________________________________________________________
340template <typename AFloat>
341void TCpu<AFloat>::ConvLayerBackward(std::vector<TCpuMatrix<AFloat>> &activationGradientsBackward,
342 TCpuMatrix<AFloat> &weightGradients, TCpuMatrix<AFloat> &biasGradients,
343 std::vector<TCpuMatrix<AFloat>> &df,
344 const std::vector<TCpuMatrix<AFloat>> &activationGradients,
345 const TCpuMatrix<AFloat> &weights,
346 const std::vector<TCpuMatrix<AFloat>> &activationsBackward, size_t batchSize,
347 size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width,
348 size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
349{
350 // Update derivatives
351 // size_t m, n;
352 // m = activationGradients[0].GetNrows();
353 // n = activationGradients[0].GetNcols();
354
355 for (size_t i = 0; i < batchSize; i++) {
356 // Compute element-wise product.
357 Hadamard(df[i], activationGradients[i]);
358 }
359
360 // Calculate the activation gradients of the previous layer
361 CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth, depth,
362 height, width, filterDepth, filterHeight, filterWidth);
363
364 // Calculate the weight gradients
365 CalculateConvWeightGradients(weightGradients, df, activationsBackward, batchSize, inputHeight, inputWidth, depth,
366 height, width, filterDepth, filterHeight, filterWidth, nLocalViews);
367
368 // Calculate the bias gradients
369 CalculateConvBiasGradients(biasGradients, df, batchSize, depth, nLocalViews);
370}
371
372//____________________________________________________________________________
373template <typename AFloat>
374void TCpu<AFloat>::CalculateConvActivationGradients(std::vector<TCpuMatrix<AFloat>> &activationGradientsBackward,
375 const std::vector<TCpuMatrix<AFloat>> &df,
376 const TCpuMatrix<AFloat> &weights, size_t batchSize,
377 size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
378 size_t width, size_t filterDepth, size_t filterHeight,
379 size_t filterWidth)
380{
381 if (activationGradientsBackward.size() == 0) return;
382
383 for (size_t i = 0; i < activationGradientsBackward.size(); i++) {
384 activationGradientsBackward[i].Zero();
385 }
386
387 // Transform the weights
388
389 //TMVA_DNN_PrintTCpuMatrix(weights,"weights");
390 // filter depth must be same as input depth
391 TCpuMatrix<AFloat> rotWeights(filterDepth, depth * filterHeight * filterWidth);
392 RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.GetNrows());
393 //TMVA_DNN_PrintTCpuMatrix(rotWeights,"rot-weights");
394
395 // Calculate the zero paddings
396 size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight - height + filterHeight - 1) / 2));
397 size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth - width + filterWidth - 1) / 2));
398
399 // size_t tempZeroPaddingHeight = 1;
400 // size_t tempZeroPaddingWidth = 1;
401
402 // Calculate the number of local views and the number of pixles in each view
403 size_t tempNLocalViews = inputHeight * inputWidth;
404 size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
405
406 size_t tempStrideRows = 1;
407 size_t tempStrideCols = 1;
408
409 // An entire convolution follows
410
411 std::vector<int> vIndices( tempNLocalViews * tempNLocalViewPixels );
412 Im2colIndices(vIndices, df[0], tempNLocalViews, height, width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
413 tempZeroPaddingHeight, tempZeroPaddingWidth);
414
415
416 //for (size_t i = 0; i < batchSize; i++) {
417 R__ASSERT(batchSize == df.size() );
418 R__ASSERT(batchSize == activationGradientsBackward.size() );
419 auto f = [&] (UInt_t i)
420 {
421
422 // Im2col(dfTr, df[i], height, width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
423 // tempZeroPaddingHeight, tempZeroPaddingWidth);
424
425 TCpuMatrix<AFloat> dfTr(tempNLocalViews, tempNLocalViewPixels);
426
427 Im2colFast(dfTr, df[i], vIndices);
428
429 //TMVA_DNN_PrintTCpuMatrix(df[i],"df[i]");
430 //TMVA_DNN_PrintTCpuMatrix(dfTr,"dfTr");
431
432 MultiplyTranspose(activationGradientsBackward[i], rotWeights, dfTr);
433
434 //TMVA_DNN_PrintTCpuMatrix(activationGradientsBackward[i],"activGrad-result");
435
436 };
437
439}
440
441//____________________________________________________________________________
442template <typename AFloat>
444 const std::vector<TCpuMatrix<AFloat>> &df,
445 const std::vector<TCpuMatrix<AFloat>> &activationsBackward,
446 size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,
447 size_t height, size_t width, size_t filterDepth, size_t filterHeight,
448 size_t filterWidth, size_t nLocalViews)
449{
450 // reinitialize the weight gradients to 0
451 weightGradients.Zero();
452
453 const size_t filterSize = filterHeight * filterWidth;
454 const size_t nLocalViewPixels = filterDepth * filterHeight * filterWidth;
455 R__ASSERT( weightGradients.GetNcols() == filterDepth * filterHeight * filterWidth);
456
457 const size_t tempStrideRows = 1;
458 const size_t tempStrideCols = 1;
459
460 // Calculate the zero paddings from the input height and width (assume stride =1 )
461 const size_t tempZeroPaddingHeight = (height - inputHeight + filterHeight - 1) / 2;
462 const size_t tempZeroPaddingWidth = (width - inputWidth + filterWidth - 1) / 2;
463
464
465 // convolution
466
467
468
469 std::vector<int> vIndices(nLocalViews * nLocalViewPixels );
470 Im2colIndices(vIndices, activationsBackward[0], nLocalViews, inputHeight, inputWidth, filterHeight , filterWidth,
471 tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
472
473 //std::cout << "do back-propagation in conv layer - compute weight gradient" << std::endl;
474
475 std::vector< TCpuMatrix<AFloat> > vres;//(batchSize);
476 for (size_t i = 0; i < batchSize; i++) {
477 vres.emplace_back(depth, nLocalViewPixels);
478 //TMVA_DNN_PrintTCpuMatrix(df[i],"df");
479 //TMVA_DNN_PrintTCpuMatrix(activationsBackward[i],"df");
480
481 }
482
483 auto fmap = [&](int i) {
484
485 //TMVA_DNN_PrintTCpuMatrix(df[i],"df-i");
486 TCpuMatrix<AFloat> xTr(nLocalViews, nLocalViewPixels);
487 TCpuMatrix<AFloat> res(depth, nLocalViewPixels);
488
489 //computing t he gradient is equivalent of doing a convolution of the input using as conv kernel the delta's (the df[] values)
490 //N.B. only stride values=1 are now supported
491
492 //xTr.Zero();
493 // Im2col(xTr, const_cast<TCpuMatrix<AFloat> &>(activationsBackward[i]), inputHeight, inputWidth, filterHeight , filterWidth,
494 // tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
495 Im2colFast(xTr, activationsBackward[i], vIndices);
496
497 //std::cout << "doing im2colfast" << std::endl;
498 //TMVA_DNN_PrintTCpuMatrix(xTr,"xTr-i");
499 //TMVA_DNN_PrintTCpuMatrix(activationsBackward[i],"actbackward-i");
500 Multiply(vres[i], df[i], xTr);
501 //TMVA_DNN_PrintTCpuMatrix(vres[i],"res_ofMT");
502
503 return;
504 //return res;
505 };
506
508
509// auto freduce = [&](const std::vector<TCpuMatrix<AFloat>> & vres) {
510 R__ASSERT(vres.size() == batchSize);
511 for (size_t i = 0; i < batchSize; i++) {
512 //TMVA_DNN_PrintTCpuMatrix(vres[i],"res");
513 for (size_t j = 0; j < depth; j++) {
514 for (size_t k = 0; k < filterDepth; k++) {
515 size_t kOffset = k * filterSize;
516 for (size_t l = 0; l < filterSize; l++) {
517 //weightGradients(j, k * (filterHeight * filterWidth) + l) += res(k, (tempNLocalViews - 1) - l);
518 weightGradients(j, kOffset + l) += vres[i](j, kOffset + l);
519 }
520 }
521 }
522 // TMVA_DNN_PrintTCpuMatrix(weightGradients,"weights_i");
523 }
524 // };
525
526 //TCpuMatrix<AFloat>::GetThreadExecutor().MapReduce(fmap, ROOT::TSeqI( batchSize ) , freduce);
527 //TMVA_DNN_PrintTCpuMatrix(weightGradients,"W-Grad");
528}
529
530//____________________________________________________________________________
531template <typename AFloat>
533 size_t batchSize, size_t depth, size_t nLocalViews)
534{
535 biasGradients.Zero();
536 for (size_t i = 0; i < depth; i++) {
537 AFloat sum = 0;
538 for (size_t j = 0; j < nLocalViews; j++) {
539 for (size_t k = 0; k < batchSize; k++) {
540 sum += df[k](i, j);
541 }
542 }
543 biasGradients(i, 0) = sum;
544 }
545}
546
547//____________________________________________________________________________
548template <typename AFloat>
550 size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows,
551 size_t strideCols)
552{
553 // image boudaries
554 int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;
555 int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;
556 size_t currLocalView = 0;
557
558 // centers
559 for (int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {
560 for (int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {
561 // within local views
562 for (int m = 0; m < (Int_t)C.GetNrows(); m++) {
563 AFloat value = -std::numeric_limits<AFloat>::max();
564
565 for (int k = i - fltHeight / 2; k <= Int_t(i + (fltHeight - 1) / 2); k++) {
566 for (int l = j - fltWidth / 2; l <= Int_t(j + (fltWidth - 1) / 2); l++) {
567 if (C(m, k * imgWidth + l) > value) {
568 value = C(m, k * imgWidth + l);
569 B(m, currLocalView) = k * imgWidth + l;
570 }
571 }
572 }
573 A(m, currLocalView) = value;
574 }
575 currLocalView++;
576 }
577 }
578}
579
580//____________________________________________________________________________
581template <typename AFloat>
583 const TCpuMatrix<AFloat> &activationGradients,
584 const TCpuMatrix<AFloat> &indexMatrix,
585 size_t /* imgHeight */,
586 size_t /* imgWidth */,
587 size_t /* fltHeight */,
588 size_t /* fltWidth */,
589 size_t /* strideRows */,
590 size_t /* strideCols */,
591 size_t nLocalViews)
592{
593 size_t depth = activationGradientsBackward.GetNrows();
594
595 for (size_t j = 0; j < depth; j++) {
596 // initialize to zeros
597 for (size_t t = 0; t < (size_t)activationGradientsBackward.GetNcols(); t++) {
598 activationGradientsBackward(j, t) = 0;
599 }
600
601 // set values
602 for (size_t k = 0; k < nLocalViews; k++) {
603 AFloat grad = activationGradients(j, k);
604 size_t winningIdx = indexMatrix(j, k);
605 activationGradientsBackward(j, winningIdx) += grad;
606 }
607 }
608}
609
610//____________________________________________________________________________
611template <typename AFloat>
613{
614 size_t nColsA = A.GetNcols();
615 size_t nColsB = B.GetNcols();
616
617 for (size_t i = 0; i < A.GetNrows(); i++) {
618 for (size_t j = 0; j < A.GetNcols(); j++) {
619 size_t nElem = i * nColsA + j;
620 A(i, j) = B(nElem / nColsB, nElem % nColsB);
621 }
622 }
623}
624
625//____________________________________________________________________________
626template <typename AFloat>
627void TCpu<AFloat>::Flatten(TCpuMatrix<AFloat> &A, const std::vector<TCpuMatrix<AFloat>> &B, size_t size, size_t nRows,
628 size_t nCols)
629{
630 for (size_t i = 0; i < (size_t)size; i++) {
631 for (size_t j = 0; j < (size_t)nRows; j++) {
632 for (size_t k = 0; k < (size_t)nCols; k++) {
633 A(i, j * nCols + k) = B[i](j, k);
634 }
635 }
636 }
637}
638
639//____________________________________________________________________________
640template <typename AFloat>
641void TCpu<AFloat>::Deflatten(std::vector<TCpuMatrix<AFloat>> &A, const TCpuMatrix<AFloat> &B, size_t size, size_t nRows,
642 size_t nCols)
643{
644 for (size_t i = 0; i < (size_t)size; i++) {
645 for (size_t j = 0; j < (size_t)nRows; j++) {
646 for (size_t k = 0; k < (size_t)nCols; k++) {
647 A[i](j, k) = B(i, j * nCols + k);
648 }
649 }
650 }
651}
652
653//______________________________________________________________________________
654template <typename AReal>
655void TCpu<AReal>::Rearrange(std::vector<TCpuMatrix<AReal>> &out, const std::vector<TCpuMatrix<AReal>> &in)
656{
657 // B x T x D out --- T x B x D in*/
658 size_t B = out.size();
659 size_t T = out[0].GetNrows();
660 size_t D = out[0].GetNcols();
661 if ((T != in.size()) || (B != in[0].GetNrows()) || (D != in[0].GetNcols())) {
662 std::cout << "Incompatible Dimensions\n"
663 << in.size() << "x" << in[0].GetNrows() << "x" << in[0].GetNcols() << " --> " << B << "x" << T << "x"
664 << D << "\n";
665 return;
666 }
667 for (size_t i = 0; i < B; ++i) {
668 for (size_t j = 0; j < T; ++j) {
669 for (size_t k = 0; k < D; ++k) {
670 out[i](j, k) = in[j](i, k);
671 }
672 }
673 }
674 return;
675}
676
677} // namespace DNN
678} // namespace TMVA
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:96
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
double floor(double)
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
The TCpuMatrix class.
Definition: CpuMatrix.h:89
size_t GetNcols() const
Definition: CpuMatrix.h:143
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:265
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:152
static const AFloat * GetOnePointer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
Definition: CpuMatrix.h:102
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:180
static ROOT::TThreadExecutor & GetThreadExecutor()
Definition: CpuMatrix.h:155
size_t GetNrows() const
Definition: CpuMatrix.h:142
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:87
size_t GetNoElements() const
Definition: CpuMatrix.h:144
static void CalculateConvActivationGradients(std::vector< TCpuMatrix< Scalar_t > > &activationGradientsBackward, const std::vector< TCpuMatrix< Scalar_t > > &df, const TCpuMatrix< Scalar_t > &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void Im2col(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
Definition: Propagation.cxx:99
static void AddRowWise(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.cxx:60
static void Im2colFast(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, const std::vector< int > &V)
static void Downsample(TCpuMatrix< AReal > &A, TCpuMatrix< AReal > &B, const TCpuMatrix< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void CalculateConvWeightGradients(TCpuMatrix< Scalar_t > &weightGradients, const std::vector< TCpuMatrix< Scalar_t > > &df, const std::vector< TCpuMatrix< Scalar_t > > &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static void MaxPoolLayerBackward(TCpuMatrix< AReal > &activationGradientsBackward, const TCpuMatrix< AReal > &activationGradients, const TCpuMatrix< AReal > &indexMatrix, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void Flatten(TCpuMatrix< AReal > &A, const std::vector< TCpuMatrix< AReal > > &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void Reshape(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
static void ConvLayerForward(std::vector< TCpuMatrix< Scalar_t > > &output, std::vector< TCpuMatrix< Scalar_t > > &derivatives, const std::vector< TCpuMatrix< Scalar_t > > &input, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &biases, const DNN::CNN::TConvParams &params, EActivationFunction activFunc, std::vector< TCpuMatrix< Scalar_t > > &)
Forward propagation in the Convolutional layer.
static void Rearrange(std::vector< TCpuMatrix< AReal > > &out, const std::vector< TCpuMatrix< AReal > > &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void RotateWeights(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void MultiplyTranspose(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
Definition: Propagation.cxx:25
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void CalculateConvBiasGradients(TCpuMatrix< Scalar_t > &biasGradients, const std::vector< TCpuMatrix< Scalar_t > > &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void ConvLayerBackward(std::vector< TCpuMatrix< Scalar_t > > &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, std::vector< TCpuMatrix< Scalar_t > > &df, const std::vector< TCpuMatrix< Scalar_t > > &activationGradients, const TCpuMatrix< Scalar_t > &weights, const std::vector< TCpuMatrix< Scalar_t > > &activationBackward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void Backward(TCpuMatrix< Scalar_t > &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, TCpuMatrix< Scalar_t > &df, const TCpuMatrix< Scalar_t > &activationGradients, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.cxx:79
static void AddConvBiases(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the biases in the Convolutional Layer.
static void Im2colIndices(std::vector< int > &V, const TCpuMatrix< AReal > &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void Deflatten(std::vector< TCpuMatrix< AReal > > &A, const TCpuMatrix< AReal > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
double beta(double x, double y)
Calculates the beta function.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
static double B[]
static double A[]
static double C[]
double T(double x)
Definition: ChebyshevPol.h:34
void Ger(const int *m, const int *n, const Real_t *alpha, const Real_t *x, const int *incx, const Real_t *y, const int *incy, Real_t *A, const int *lda)
Add the outer product of x and y to the matrix A.
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *B, const int *ldb, const Real_t *beta, Real_t *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
Abstract ClassifierFactory template that handles arbitrary types.
size_t strideRows
The number of row pixels to slid the filter each step.
Definition: ConvLayer.h:168
size_t filterHeight
The height of the filter.
Definition: ConvLayer.h:165
size_t inputHeight
The height of the previous layer or input.
Definition: ConvLayer.h:161
size_t paddingWidth
The number of zero layers left and right of the input.
Definition: ConvLayer.h:171
size_t filterWidth
The width of the filter.
Definition: ConvLayer.h:166
size_t paddingHeight
The number of zero layers added top and bottom of the input.
Definition: ConvLayer.h:170
size_t inputWidth
The width of the previous layer or input.
Definition: ConvLayer.h:162
size_t inputDepth
The depth of the previous layer or input.
Definition: ConvLayer.h:160
size_t strideCols
The number of column pixels to slid the filter each step.
Definition: ConvLayer.h:169
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
auto * a
Definition: textangle.C:12
static long int sum(long int i)
Definition: Factory.cxx:2258