Logo ROOT  
Reference Guide
Propagation.hxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 10/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////////////////
13// Implementation of the functions required for the forward and //
14// backward propagation of activations through a neural network for //
15// the reference implementation. //
16//////////////////////////////////////////////////////////////////////
17
19
20
21#ifdef R__HAS_TMVACPU
23#else
25#endif
26
27namespace TMVA {
28namespace DNN {
29
30
31
32template <typename AFloat>
33void TCpu<AFloat>::MultiplyTranspose(TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &input,
34 const TCpuMatrix<AFloat> &Weights)
35{
36
37 int m = (int)input.GetNrows();
38 int k = (int)input.GetNcols();
39 int n = (int)Weights.GetNrows();
40
41 if ((int)output.GetNrows() != m) {
42 Error("MultiplyTranspose","Invalid input - output rows - input: %d != output : %d",m, (int) output.GetNrows());
43 R__ASSERT((int) output.GetNrows() == m);
44 }
45 if ((int)output.GetNcols() != n) {
46 Error("MultiplyTranspose","Invalid output cols or weight rows - output cols: %d != weight rows : %d",(int) output.GetNcols(),n);
47 R__ASSERT((int) output.GetNcols() == n);
48 }
49 if ((int)Weights.GetNcols() != k) {
50 Error("MultiplyTranspose","Invalid input cols or weight cols - input cols: %d != weight cols : %d", k, (int) Weights.GetNcols());
51 R__ASSERT((int) Weights.GetNcols() == k);
52 }
53
54#ifdef R__HAS_TMVACPU
55
56 char transa = 'N';
57 char transb = 'T';
58
59 AFloat alpha = 1.0;
60 AFloat beta = 0.0;
61
62 const AFloat *A = input.GetRawDataPointer();
63 const AFloat *B = Weights.GetRawDataPointer();
64 AFloat *C = output.GetRawDataPointer();
65
66 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha, A, &m, B, &n, &beta, C, &m);
67#else
68 TMatrixT<AFloat> tmp(output.GetNrows(), output.GetNcols());
69 tmp.MultT(input, Weights);
70 output = tmp;
71#endif
72}
73
74template <typename AFloat>
75void TCpu<AFloat>::AddRowWise(TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &biases)
76{
77#ifdef R__HAS_TMVACPU
78 int m = (int)output.GetNrows();
79 int n = (int)output.GetNcols();
80
81 int inc = 1.0;
82 AFloat alpha = 1.0;
83
84 AFloat *A = output.GetRawDataPointer();
85 const AFloat *x = TCpuMatrix<AFloat>::GetOnePointer();
86 const AFloat *y = biases.GetRawDataPointer();
87
89 R__ASSERT(n <= (int)(biases.GetNcols()*biases.GetNrows()));
90
91 ::TMVA::DNN::Blas::Ger(&m, &n, &alpha, x, &inc, y, &inc, A, &m);
92#else
95 output = tmp;
96#endif
97}
98
99template <typename AFloat>
100void TCpu<AFloat>::Backward(TCpuTensor<AFloat> &activationGradientsBackward, TCpuMatrix<AFloat> &weightGradients,
101 TCpuMatrix<AFloat> &biasGradients, const TCpuTensor<AFloat> &df,
102 const TCpuTensor<AFloat> &/*activationGradients*/, const TCpuMatrix<AFloat> &weights,
103 const TCpuTensor<AFloat> &activationsBackward)
104{
105 // Compute element-wise product.
106 //Hadamard(df, activationGradients);
107
108 Matrix_t df_m = df.GetMatrix();
109
110 // Activation gradients (exclude if it is first layer)
111 if (activationGradientsBackward.GetSize() > 0 ) {
112
113 Matrix_t activationGradientsBackward_m = activationGradientsBackward.GetMatrix();
114
115 Multiply(activationGradientsBackward_m, df_m, weights);
116 }
117
118 // Weight gradients.
119 if (weightGradients.GetNoElements() > 0) TransposeMultiply(weightGradients, df_m, activationsBackward.GetMatrix());
120
121 // PrintTensor(activationsBackward,"activ backward");
122 //PrintTensor(Tensor_t(weightGradients),"weight gradients");
123
124 // Bias gradients.
125 if (biasGradients.GetNoElements() > 0) SumColumns(biasGradients, df_m);
126}
127
128
129
130//____________________________________________________________________________
131template <typename AFloat>
132void TCpu<AFloat>::Im2col(TCpuMatrix<AFloat> &A, const TCpuMatrix<AFloat> &B, size_t imgHeight, size_t imgWidth,
133 size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
134 size_t zeroPaddingHeight, size_t zeroPaddingWidth)
135{
136
137 // image boudaries
138 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
139 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
140 size_t currLocalView = 0;
141
142 const int halfFltHeight = fltHeight / 2;
143 const int halfFltWidth = fltWidth / 2;
144 const int halfFltHeightM1 = (fltHeight - 1) / 2;
145 const int halfFltWidthM1 = (fltWidth - 1) / 2;
146 const int nRowsInput = B.GetNrows();
147 const int nColsInput = B.GetNcols();
148 const int nRowsOutput = A.GetNrows();
149 const int nColsOutput = A.GetNcols();
150
151 // convolution centers
152 for (int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
153 for (int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
154 size_t currLocalViewPixel = 0;
155
156 // within the local view
157 R__ASSERT((int) currLocalView < nRowsOutput );
158
159 for (int m = 0; m < nRowsInput; m++) {
160 for (int k = i - halfFltHeight ; k <= Int_t(i + halfFltHeightM1 ); k++) {
161 int kstep = k * imgWidth;
162 for (int l = j - halfFltWidth ; l <= Int_t(j + halfFltWidthM1); l++) {
163
164 // Check the boundaries
165 R__ASSERT((int) currLocalViewPixel < nColsOutput );
166 //R__ASSERT(k * imgWidth + l < B.GetNcols());
167 if (k < 0 || k >= (Int_t)imgHeight || l < 0 || l >= (Int_t)imgWidth || kstep + l >= nColsInput)
168 A(currLocalView, currLocalViewPixel++) = 0;
169 else
170 A(currLocalView, currLocalViewPixel++) = B(m, kstep + l);
171 }
172 }
173 }
174 //std::cout << " i " << i << " " << j << " increment currLocalView " << currLocalView << std::endl;
175 currLocalView++;
176 }
177 }
178 //TMVA_DNN_PrintTCpuMatrix(A,"FromIm2Col");
179}
180
181//____________________________________________________________________________
182template <typename AFloat>
183void TCpu<AFloat>::Im2colIndices(std::vector<int> &V, const TCpuMatrix<AFloat> &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth,
184 size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
185 size_t zeroPaddingHeight, size_t zeroPaddingWidth)
186{
187
188 // image boudaries
189 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
190 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
191 size_t currLocalView = 0;
192
193 const int halfFltHeight = fltHeight / 2;
194 const int halfFltWidth = fltWidth / 2;
195 const int halfFltHeightM1 = (fltHeight - 1) / 2;
196 const int halfFltWidthM1 = (fltWidth - 1) / 2;
197 const int nRowsInput = B.GetNrows();
198 const int nColsInput = B.GetNcols();
199 const size_t nSizeOutput = V.size();
200 const int npixels = nRowsInput * fltHeight * fltWidth;
201 // const int nRowsOutput = A.GetNrows();
202 // const int nColsOutput = A.GetNcols();
203
204 // convolution centers
205 for (int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
206 for (int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
207 size_t currLocalViewPixel = 0;
208
209 // within the local view
210 //R__ASSERT((int) currLocalView < nRowsOutput );
211
212 for (int m = 0; m < nRowsInput; m++) {
213 for (int k = i - halfFltHeight ; k <= Int_t(i + halfFltHeightM1 ); k++) {
214 int kstep = k * imgWidth;
215 for (int l = j - halfFltWidth ; l <= Int_t(j + halfFltWidthM1); l++) {
216
217 // Check the boundaries
218 //R__ASSERT(currLocalViewPixel < nColsOutput );
219 R__ASSERT(currLocalView * npixels + currLocalViewPixel < nSizeOutput );
220 if (k < 0 || k >= (Int_t)imgHeight || l < 0 || l >= (Int_t)imgWidth || kstep + l >= nColsInput)
221 //V[currLocalView * npixels + currLocalViewPixel]=-1;
222 V[currLocalViewPixel * nLocalViews + currLocalView] = -1;
223 else
224 V[currLocalViewPixel * nLocalViews + currLocalView]= ( kstep + l) * nRowsInput + m;
225
226 currLocalViewPixel++;
227 }
228 }
229 }
230 currLocalView++;
231 }
232 }
233}
234template <typename AFloat>
235void TCpu<AFloat>::Im2colFast(TCpuMatrix<AFloat> &A, const TCpuMatrix<AFloat> &B, const std::vector<int> &V)
236{
237 size_t n = V.size();
238 R__ASSERT( n == A.GetNcols() * A.GetNrows() );
239 AFloat * a = A.GetRawDataPointer();
240 const AFloat * b = B.GetRawDataPointer();
241
242//#define DL_USE_MTE
243 // parallel execution
244#ifdef DL_USE_MTE
245 const size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(n);
246
247 auto f = [&](UInt_t workerID)
248 {
249 for (size_t j = 0; j < nsteps; ++j) {
250 size_t ii = workerID+j;
251 if (ii >= n) break;
252 int idx = V[ii];
253 if (idx >= 0) a[ii] = b[idx];
254 else a[ii] = 0;
255 }
256 return 0;
257 };
258
259 A.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,n,nsteps) );
260
261#else
262 //serial execution
263 for (size_t ii = 0; ii < n; ++ii) {
264 int idx = V[ii];
265 if (idx >= 0) a[ii] = b[idx];
266 else a[ii] = 0;
267 }
268
269#endif
270}
271//____________________________________________________________________________
272template <typename AFloat>
274 size_t filterHeight, size_t filterWidth, size_t numFilters)
275{
276 size_t jump = filterHeight * filterWidth;
277 for (size_t j = 0; j < filterDepth; j++) {
278 for (size_t k = 0; k < numFilters; k++) {
279 for (size_t i = 0; i < jump; i++) {
280 A(j, k * jump + i) = B(k, ((j + 1) * jump - 1) - i);
281 //A(j, k * jump + i) = B(k, j * jump + i);
282 }
283 }
284 }
285}
286
287//____________________________________________________________________________
288template <typename AFloat>
290{
291#ifdef R__HAS_TMVACPU
292 int m = (int)output.GetNrows();
293 int n = (int)output.GetNcols();
294
295 int inc = 1.0;
296 AFloat alpha = 1.0;
297
298 AFloat *A = output.GetRawDataPointer();
299 const AFloat *x = biases.GetRawDataPointer();
300 const AFloat *y = TCpuMatrix<AFloat>::GetOnePointer();
301
302 R__ASSERT(m <= (int)biases.GetNoElements() );
304
305 ::TMVA::DNN::Blas::Ger(&m, &n, &alpha, x, &inc, y, &inc, A, &m);
306#else
309 output = tmp;
310#endif
311}
312
313template<typename AFloat>
314size_t TCpu<AFloat>::calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
315{
316 size_t temp = imgDim - fltDim + 2 * padding;
317 if (temp % stride || temp + stride <= 0) {
318 Fatal("calculateDimension", "Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride) "
319 "%zu, %zu, %zu, %zu", imgDim, fltDim, padding, stride);
320 }
321 return temp / stride + 1;
322}
323
324//____________________________________________________________________________
325template <typename AFloat>
327 TCpuTensor<AFloat> & inputActivationFunc,
328 const TCpuTensor<AFloat> &input,
329 const TCpuMatrix<AFloat> &weights, const TCpuMatrix<AFloat> & biases,
330 const DNN::CNN::TConvParams & params, EActivationFunction activFunc,
331 TCpuTensor<AFloat> & /* */,
332 const ConvDescriptors_t & /*descriptors*/,
333 ConvWorkspace_t & /*workspace*/)
334{
335 size_t height = calculateDimension(params.inputHeight, params.filterHeight, params.paddingHeight, params.strideRows);
336 size_t width = calculateDimension(params.inputWidth, params.filterWidth, params.paddingWidth, params.strideCols);
337 size_t nLocalViews = height * width;
338 size_t nLocalViewPixels = params.inputDepth * params.filterHeight * params.filterWidth;
339
340 R__ASSERT( input.GetSize() > 0);
341 std::vector<int> forwardIndices(nLocalViews * nLocalViewPixels);
342 Im2colIndices(forwardIndices, input.At(0).GetMatrix(), nLocalViews, params.inputHeight, params.inputWidth, params.filterHeight,
343 params.filterWidth, params.strideRows, params.strideCols, params.paddingHeight, params.paddingWidth);
344
345 //this should fix multi-thread inizializations of arrays
347 TCpuMatrix<AFloat>::InitializeOneVector(output.GetWSize()); // since it is used in AddCOnvBiases
348
349
350 auto f = [&] (UInt_t i)
351 {
352 // dropout not yet implemented for CNN
353 // if (applyDropout && (dropoutProbability != 1.0)) {
354 // Dropout(input[i], dropoutProbability);
355 // }
356
357 TCpuMatrix<AFloat> inputTr(nLocalViews, nLocalViewPixels);
358 //inputTr.Zero(); // this is not thread safe
359
360 Im2colFast(inputTr, input.At(i).GetMatrix(), forwardIndices);
361
362 Matrix_t output_m = output.At(i).GetMatrix();
363 MultiplyTranspose(output_m, weights, inputTr);
364 AddConvBiases(output_m, biases);
365
366 };
367
369
370 //evaluateDerivative<TCpu<AFloat>>(derivatives, activFunc, output);
371 // need to save output of convolution (input to activation function)
372 Copy(inputActivationFunc, output);
373
374 //evaluate<TCpu<AFloat>>(output, activFunc);
375 ActivationFunctionForward(output, activFunc, ActivationDescriptor_t());
376}
377
378//____________________________________________________________________________
379template <typename AFloat>
380void TCpu<AFloat>::ConvLayerBackward(TCpuTensor<AFloat> &activationGradientsBackward,
381 TCpuMatrix<AFloat> &weightGradients, TCpuMatrix<AFloat> &biasGradients,
382 TCpuTensor<AFloat> &inputActivationFunc,
383 TCpuTensor<AFloat> &activationGradients,
384 const TCpuMatrix<AFloat> &weights,
385 const TCpuTensor<AFloat> &activationsBackward,
386 const Tensor_t & outputTensor,
387 EActivationFunction activFunc,
388 const ConvDescriptors_t & /*descriptors*/,
389 ConvWorkspace_t & /*workspace*/,
390 size_t batchSize, size_t inputHeight,
391 size_t inputWidth, size_t depth,
392 size_t height, size_t width,
393 size_t filterDepth, size_t filterHeight,
394 size_t filterWidth, size_t nLocalViews)
395{
396 // Update derivatives
397 // size_t m, n;
398 // m = activationGradients[0].GetNrows();
399 // n = activationGradients[0].GetNcols();
400
401
402 // Compute activation backward pass dx = f'(x) * dy
403 // put resulting dx of activation in activationgradients
404 Tensor_t df(activationGradients.GetShape() ); // this is a deep copy, could be put as data member of class
405 ActivationFunctionBackward(df, outputTensor, activationGradients, inputActivationFunc,
406 activFunc, ActivationDescriptor_t() );
407
408 // Hadamard(df, activationGradients);
409
410
411 // Calculate the activation gradients of the previous layer
412 CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth, depth,
413 height, width, filterDepth, filterHeight, filterWidth);
414
415 // Calculate the weight gradients
416 CalculateConvWeightGradients(weightGradients, df, activationsBackward, batchSize, inputHeight, inputWidth, depth,
417 height, width, filterDepth, filterHeight, filterWidth, nLocalViews);
418
419 // Calculate the bias gradients
420 CalculateConvBiasGradients(biasGradients, df, batchSize, depth, nLocalViews);
421}
422
423//____________________________________________________________________________
424template <typename AFloat>
426 const TCpuTensor<AFloat> &df,
427 const TCpuMatrix<AFloat> &weights, size_t batchSize,
428 size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
429 size_t width, size_t filterDepth, size_t filterHeight,
430 size_t filterWidth)
431{
432 if (activationGradientsBackward.GetSize() == 0) return;
433
434
435 activationGradientsBackward.Zero();
436
437
438 // Transform the weights
439
440 //TMVA_DNN_PrintTCpuMatrix(weights,"weights");
441 // filter depth must be same as input depth
442 TCpuMatrix<AFloat> rotWeights(filterDepth, depth * filterHeight * filterWidth);
443 RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.GetNrows());
444 //TMVA_DNN_PrintTCpuMatrix(rotWeights,"rot-weights");
445
446 // Calculate the zero paddings
447 size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight - height + filterHeight - 1) / 2));
448 size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth - width + filterWidth - 1) / 2));
449
450 // size_t tempZeroPaddingHeight = 1;
451 // size_t tempZeroPaddingWidth = 1;
452
453 // Calculate the number of local views and the number of pixles in each view
454 size_t tempNLocalViews = inputHeight * inputWidth;
455 size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
456
457 size_t tempStrideRows = 1;
458 size_t tempStrideCols = 1;
459
460 // An entire convolution follows
461
462 std::vector<int> vIndices( tempNLocalViews * tempNLocalViewPixels );
463 Im2colIndices(vIndices, df.At(0).GetMatrix(), tempNLocalViews, height, width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
464 tempZeroPaddingHeight, tempZeroPaddingWidth);
465
466
467 //for (size_t i = 0; i < batchSize; i++) {
468 R__ASSERT(batchSize == df.GetFirstSize() );
469 R__ASSERT(batchSize == activationGradientsBackward.GetFirstSize() );
470 auto f = [&] (UInt_t i)
471 {
472 // Im2col(dfTr, df[i], height, width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
473 // tempZeroPaddingHeight, tempZeroPaddingWidth);
474
475 TCpuMatrix<AFloat> dfTr(tempNLocalViews, tempNLocalViewPixels);
476
477 Im2colFast(dfTr, df.At(i).GetMatrix(), vIndices);
478
479 //TMVA_DNN_PrintTCpuMatrix(df[i],"df[i]");
480 //TMVA_DNN_PrintTCpuMatrix(dfTr,"dfTr");
481
482 Matrix_t agb_m = activationGradientsBackward.At(i).GetMatrix();
483 MultiplyTranspose(agb_m, rotWeights, dfTr);
484
485 //TMVA_DNN_PrintTCpuMatrix(activationGradientsBackward[i],"activGrad-result");
486
487 };
488
490}
491
492//____________________________________________________________________________
493template <typename AFloat>
495 const TCpuTensor<AFloat> &df,
496 const TCpuTensor<AFloat> &activationsBackward,
497 size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,
498 size_t height, size_t width, size_t filterDepth, size_t filterHeight,
499 size_t filterWidth, size_t nLocalViews)
500{
501 // reinitialize the weight gradients to 0
502 weightGradients.Zero();
503
504 const size_t filterSize = filterHeight * filterWidth;
505 const size_t nLocalViewPixels = filterDepth * filterHeight * filterWidth;
506 R__ASSERT( weightGradients.GetNcols() == filterDepth * filterHeight * filterWidth);
507
508 const size_t tempStrideRows = 1;
509 const size_t tempStrideCols = 1;
510
511 // Calculate the zero paddings from the input height and width (assume stride =1 )
512 const size_t tempZeroPaddingHeight = (height - inputHeight + filterHeight - 1) / 2;
513 const size_t tempZeroPaddingWidth = (width - inputWidth + filterWidth - 1) / 2;
514
515
516 // convolution
517
518
519
520 std::vector<int> vIndices(nLocalViews * nLocalViewPixels );
521 Im2colIndices(vIndices, activationsBackward.At(0).GetMatrix(), nLocalViews, inputHeight, inputWidth, filterHeight , filterWidth,
522 tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
523
524 //std::cout << "do back-propagation in conv layer - compute weight gradient" << std::endl;
525
526 // std::vector< TCpuMatrix<AFloat> > vres;//(batchSize);
527 // for (size_t i = 0; i < batchSize; i++) {
528 // vres.emplace_back(depth, nLocalViewPixels);
529 // //TMVA_DNN_PrintTCpuMatrix(df[i],"df");
530 // //TMVA_DNN_PrintTCpuMatrix(activationsBackward[i],"df");
531
532 //}
533 //TCpuTensor<AFloat> vres( { batchSize, depth, nLocalViewPIxels} );
534 TCpuTensor<AFloat> vres( batchSize, depth, nLocalViewPixels);
535
536 auto fmap = [&](int i) {
537
538 //TMVA_DNN_PrintTCpuMatrix(df[i],"df-i");
539 TCpuMatrix<AFloat> xTr(nLocalViews, nLocalViewPixels);
540 TCpuMatrix<AFloat> res(depth, nLocalViewPixels);
541
542 //computing t he gradient is equivalent of doing a convolution of the input using as conv kernel the delta's (the df[] values)
543 //N.B. only stride values=1 are now supported
544
545 //xTr.Zero();
546 // Im2col(xTr, const_cast<TCpuMatrix<AFloat> &>(activationsBackward[i]), inputHeight, inputWidth, filterHeight , filterWidth,
547 // tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
548 Im2colFast(xTr, activationsBackward.At(i).GetMatrix(), vIndices);
549
550 //std::cout << "doing im2colfast" << std::endl;
551 //TMVA_DNN_PrintTCpuMatrix(xTr,"xTr-i");
552 //TMVA_DNN_PrintTCpuMatrix(activationsBackward[i],"actbackward-i");
553 Matrix_t mres = vres.At(i).GetMatrix();
554 Multiply( mres, df.At(i).GetMatrix(), xTr);
555 //TMVA_DNN_PrintTCpuMatrix(vres[i],"res_ofMT");
556
557 return;
558 //return res;
559 };
560
562
563// auto freduce = [&](const TCpuTensor<AFloat> & vres) {
564 R__ASSERT(vres.GetFirstSize() == batchSize);
565 for (size_t i = 0; i < batchSize; i++) {
566 //TMVA_DNN_PrintTCpuMatrix(vres[i],"res");
567 Matrix_t vres_m = vres.At(i).GetMatrix();
568 for (size_t j = 0; j < depth; j++) {
569 for (size_t k = 0; k < filterDepth; k++) {
570 size_t kOffset = k * filterSize;
571 for (size_t l = 0; l < filterSize; l++) {
572 //weightGradients(j, k * (filterHeight * filterWidth) + l) += res(k, (tempNLocalViews - 1) - l);
573 weightGradients(j, kOffset + l) += vres_m(j, kOffset + l);
574 }
575 }
576 }
577 // TMVA_DNN_PrintTCpuMatrix(weightGradients,"weights_i");
578 }
579 // };
580
581 //TCpuMatrix<AFloat>::GetThreadExecutor().MapReduce(fmap, ROOT::TSeqI( batchSize ) , freduce);
582 //TMVA_DNN_PrintTCpuMatrix(weightGradients,"W-Grad");
583}
584
585//____________________________________________________________________________
586template <typename AFloat>
588 size_t batchSize, size_t depth, size_t nLocalViews)
589{
590 biasGradients.Zero();
591 for (size_t i = 0; i < depth; i++) {
592 AFloat sum = 0;
593 for (size_t j = 0; j < nLocalViews; j++) {
594 for (size_t k = 0; k < batchSize; k++) {
595 sum += df(k,i,j);
596 //sum += df[k](i, j);
597 }
598 }
599 biasGradients(i, 0) = sum;
600 }
601}
602
603//____________________________________________________________________________
604template <typename AFloat>
606 const PoolingDescriptors_t & /*descriptors*/,
607 PoolingWorkspace_t & /*workspace*/,
608 size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows,
609 size_t strideCols)
610{
611 // A is output , B is a cached index tensor used for backward pass and C is the input
612
613 assert( tA.GetFirstSize() == tC.GetFirstSize());
614 for (size_t ifirst = 0; ifirst < tC.GetFirstSize(); ++ifirst) {
615
616 Matrix_t A = tA.At(ifirst).GetMatrix();
617 Matrix_t B = tB.At(ifirst).GetMatrix();
618 Matrix_t C = tC.At(ifirst).GetMatrix();
619
620 // image boudaries
621 int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;
622 int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;
623 size_t currLocalView = 0;
624
625 // centers
626 for (int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {
627 for (int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {
628 // within local views
629 for (int m = 0; m < (Int_t)C.GetNrows(); m++) {
630 AFloat value = -std::numeric_limits<AFloat>::max();
631
632 for (int k = i - fltHeight / 2; k <= Int_t(i + (fltHeight - 1) / 2); k++) {
633 for (int l = j - fltWidth / 2; l <= Int_t(j + (fltWidth - 1) / 2); l++) {
634 if (C(m, k * imgWidth + l) > value) {
635 value = C(m, k * imgWidth + l);
636 B(m, currLocalView) = k * imgWidth + l;
637 }
638 }
639 }
640 A(m, currLocalView) = value;
641 }
642 currLocalView++;
643 }
644 }
645 }
646}
647
648//____________________________________________________________________________
649template <typename AFloat>
651 const TCpuTensor<AFloat> &activationGradients,
652 const TCpuTensor<AFloat> &indexMatrix,
653 const TCpuTensor<AFloat> & /*inputActivation*/,
654 const TCpuTensor<AFloat> & /*outputTensor*/,
655 const PoolingDescriptors_t & /*descriptors*/,
656 PoolingWorkspace_t & /*workspace*/,
657 size_t /* imgHeight */,
658 size_t /* imgWidth */,
659 size_t /* fltHeight */,
660 size_t /* fltWidth */,
661 size_t /* strideRows */,
662 size_t /* strideCols */,
663 size_t nLocalViews)
664{
665
666 assert( activationGradientsBackward.GetFirstSize() == activationGradients.GetFirstSize());
667 for (size_t l = 0; l < activationGradients.GetFirstSize(); ++l) {
668
669 Matrix_t activationGradientsBackward_m = activationGradientsBackward.At(l).GetMatrix();
670 Matrix_t activationGradients_m = activationGradients.At(l).GetMatrix();
671 Matrix_t indexMatrix_m = indexMatrix.At(l).GetMatrix();
672
673 size_t depth = activationGradientsBackward_m.GetNrows();
674
675 for (size_t j = 0; j < depth; j++) {
676 // initialize to zeros
677 for (size_t t = 0; t < (size_t)activationGradientsBackward_m.GetNcols(); t++) {
678 activationGradientsBackward_m(j, t) = 0;
679 }
680
681 // set values
682 for (size_t k = 0; k < nLocalViews; k++) {
683 AFloat grad = activationGradients_m(j, k);
684 size_t winningIdx = indexMatrix_m(j, k);
685 activationGradientsBackward_m(j, winningIdx) += grad;
686 }
687 }
688 }
689}
690
691//____________________________________________________________________________
692template <typename AFloat>
694 // reshape tensor for batch norm layer according to normalization axis
695 // input x - output reshpe of X
696 if (axis == 1) {
697 // reshape to a RowMajor tensor so I can use same indices, in this case channel
698 typename TCpuTensor<AFloat>::Shape_t newShape = { x.GetSize() / x.GetShape().front() , x.GetShape().front() }; // shape is HXWXB , C
699 TCpuTensor<AFloat> xtmp(x.GetDeviceBuffer(), newShape, TCpuTensor<AFloat>::MemoryLayout::RowMajor);
700 return xtmp;
701 }
702 // dense layer case (axis == -1)
703 return x.Reshape( { x.GetShape().front(), x.GetSize()/ x.GetShape().front()});
704 // what to do with time layer ?
705}
706
707//____________________________________________________________________________
708template <typename AFloat>
712 Matrix_t & mean,
713 Matrix_t & variance,
714 Matrix_t & iVariance,
715 Matrix_t & runningMeans,
716 Matrix_t & runningVars,
717 Scalar_t nTrainedBatches,
718 Scalar_t momentum, Scalar_t epsilon,
719 const TensorDescriptor_t & )
720 //BNormWorkspace_t * workspace )
721{
722 // the tensor are reshaped in order to
723 // have a first coordinate the normalized coordinates and second the feature we are computing the norm
724 // e.g. batch size , number of features or
725 // B x H x W , C
726
727 TCpuTensor<AFloat> input = BatchNormLayerReshapeTensor(axis,x);
728 TCpuTensor<AFloat> output = BatchNormLayerReshapeTensor(axis,y);
729
730 assert (input.GetShape().size() == 2);
731 size_t n = input.GetShape()[0]; // size of coordinates we are normalizing (e.g batch size)
732 size_t d = input.GetShape()[1]; // size of the coordinate we are not normalizing (e.g. feature size)
733
734 TCpuBuffer<AFloat> &inputBuffer = input.GetDeviceBuffer();
735 TCpuBuffer<AFloat> &outputBuffer = output.GetDeviceBuffer();
736
737
738 // lambda implementing computation for each single component k we need to normalize
739 auto f = [&] (size_t k)
740 {
741
742 auto inputK = inputBuffer.GetSubBuffer(k * n, n);
743 auto outputK = outputBuffer.GetSubBuffer(k * n, n);
744
745 double meanK = 0;
746 meanK = 0;
747 for (size_t i = 0; i < n; i++) {
748 AFloat xi = inputK[i];
749 meanK += xi;
750 }
751 meanK = meanK/ n;
752
753 double sq = 0;
754 for (size_t i = 0; i < n; i++) {
755 AFloat xi = inputK[i];
756 double xmu = xi - meanK;
757 sq = sq + (xmu * xmu);
758 outputK[i] = AFloat(xmu);
759 }
760 mean(0,k) = meanK;
761 variance(0,k) = sq / n;
762 iVariance(0,k) = 1. / std::sqrt(variance(0,k) + epsilon);
763
764 double iVK = iVariance(0, k);
765 double gK = gamma(0, k);
766 double bK = beta(0, k);
767 for (size_t i = 0; i < n; i++) {
768 AFloat yi = outputK[i] ;
769 outputK[i] = AFloat( gK * iVK * yi + bK );
770 }
771
772
773 // fVar(0,k) -= epsilon;
774
775 if (nTrainedBatches == 0) {
776 runningMeans(0,k) = mean(0,k);
777 runningVars(0,k) = variance(0,k) * (n) / (Scalar_t(n - 1) + epsilon);
778 } else {
779 double decay = momentum;
780 if (momentum < 0) decay = nTrainedBatches/Scalar_t(nTrainedBatches+1);
781 runningMeans(0,k) = decay * runningMeans(0,k) + (1. - decay) * mean(0,k);
782 runningVars(0,k) = decay * runningVars(0,k) + (1.-decay) * variance(0,k) * (n) / (Scalar_t(n - 1) + epsilon);
783 }
784 // std::cout << " training batch " << nTrainedBatches << " estimated mu : " << runningMeans(0, k)
785 // << " estimated var " << runningVars(0,k) << std::endl;
786
787 }; // end f(k) definition
788
790}
791
792//____________________________________________________________________________
793template <typename AFloat>
795 Matrix_t & gamma,
796 Matrix_t & beta,
798 const Matrix_t & runningMeans,
799 const Matrix_t & runningVars,
801 const TensorDescriptor_t & )
802{
803 TCpuTensor<AFloat> input = BatchNormLayerReshapeTensor(axis,x);
804 TCpuTensor<AFloat> output = BatchNormLayerReshapeTensor(axis,y);
805
806 assert (input.GetShape().size() == 2);
807 size_t n = input.GetShape()[0]; // size of coordinates we are normalizing (e.g batch size)
808 size_t d = input.GetShape()[1];
809
810 TCpuBuffer<AFloat> &inputBuffer = input.GetDeviceBuffer();
811 TCpuBuffer<AFloat> &outputBuffer = output.GetDeviceBuffer();
812
813 auto f = [&] (size_t k) {
814
815 auto inputK = inputBuffer.GetSubBuffer(k * n, n);
816 auto outputK = outputBuffer.GetSubBuffer(k * n, n);
817
818 double gK = gamma(0, k);
819 double bK = beta(0, k);
820 double mK = runningMeans(0, k);
821 double vK = 1. / (sqrt(runningVars(0, k) + epsilon));
822
823 // during inference just use stored mu and variance
824 for (size_t i = 0; i < n; i++) {
825 AFloat xi = inputK[i];
826 outputK[i] = AFloat( gK * (xi - mK) * vK + bK );
827 }
828 }; // end definition of f(k)
829
831}
832
833//____________________________________________________________________________
834template <typename AFloat>
836 const TCpuTensor<AFloat> &dy,
838 Matrix_t &gamma, // Matrix_t &beta, (not needed)
839 Matrix_t &dgamma, Matrix_t &dbeta,
840 const Matrix_t & mean,
841 const Matrix_t & variance,
842 const Matrix_t & iVariance,
844 const TensorDescriptor_t & )
845{
846 TCpuTensor<AFloat> input = BatchNormLayerReshapeTensor(axis,x);
847 TCpuTensor<AFloat> inputGrad = BatchNormLayerReshapeTensor(axis,dx);
848 TCpuTensor<AFloat> outputGrad = BatchNormLayerReshapeTensor(axis,dy);
849
850 assert (outputGrad.GetShape().size() == 2);
851 size_t n = outputGrad.GetShape()[0]; // size of coordinates we are normalizing (e.g batch size)
852 size_t d = outputGrad.GetShape()[1];
853
854 TCpuBuffer<AFloat> & inputBuffer = input.GetDeviceBuffer();
855 TCpuBuffer<AFloat> & dyBuffer = outputGrad.GetDeviceBuffer();
856 TCpuBuffer<AFloat> & dxBuffer = inputGrad.GetDeviceBuffer();
857
858
859 // compute first gradients for gamma and beta
860 auto f = [&] (size_t k) {
861 dgamma(0, k) = 0;
862 dbeta(0, k) = 0;
863 auto inputK = inputBuffer.GetSubBuffer(k * n, n);
864 auto outputGradK = dyBuffer.GetSubBuffer(k * n, n);
865 auto inputGradK = dxBuffer.GetSubBuffer(k * n, n);
866 auto meanK = mean(0, k);
867 for (size_t i = 0; i < n; i++) {
868 AFloat xi = inputK[i];
869 double xhat = xi - meanK;
870 dbeta(0, k) += outputGradK[i];
871 dgamma(0, k) += outputGradK[i] * xhat;
872 }
873 double npSumDy = dbeta(0, k);
874 double npSumDyHMu = dgamma(0, k);
875 dgamma(0, k) *= iVariance(0, k);
876
877 // compute gradients with respect to input
878 double bterm = npSumDyHMu / (variance(0, k) + epsilon);
879 double aterm = (1. / double(n) * gamma(0, k) * iVariance(0, k));
880 for (size_t i = 0; i < n; i++) {
881 AFloat xi = inputK[i];
882 AFloat dyi = outputGradK[i];
883 double xmu = xi - meanK;
884 inputGradK[i] = AFloat( aterm * (n * dyi - npSumDy - xmu * bterm) );
885 }
886 };
887
889}
890
891//____________________________________________________________________________
892template <typename AFloat>
894{
895 size_t nColsA = A.GetNcols();
896 size_t nColsB = B.GetNcols();
897
898 for (size_t i = 0; i < A.GetNrows(); i++) {
899 for (size_t j = 0; j < A.GetNcols(); j++) {
900 size_t nElem = i * nColsA + j;
901 A(i, j) = B(nElem / nColsB, nElem % nColsB);
902 }
903 }
904}
905
906//____________________________________________________________________________
907template <typename AFloat>
909{
910
911 //printf ( "input tensor %f \n",B(0,0,0));
912 //std::cout << "Flatten CPU arch " << std::endl;
913
914 assert( B.GetShape().size() == 3 );
915 assert( A.GetShape().size() == 3 );
916
917
918 size_t bsize = B.GetFirstSize();
919 size_t nRows = B.GetHSize();
920 size_t nCols = B.GetWSize();
921
922 assert ( A.GetFirstSize() == 1);
923 assert ( A.GetHSize() == bsize);
924 assert ( A.GetWSize() == nRows*nCols);
925
926 for (size_t i = 0; i < bsize; i++) {
927 for (size_t j = 0; j < nRows; j++) {
928 for (size_t k = 0; k < nCols; k++) {
929 A( 0, i, j * nCols + k) = B(i, j, k);
930 }
931 }
932 }
933
934 // size_t bsize = B.GetFirstSize();
935 // size_t n = B.GetSize()/bsize;
936 // if (B.GetLayout() == TCpuTensor<AFloat>::MemoryLayout::ColumnMajor ) {
937
938 // }
939 // A = B.Reshape(bsize, n)
940}
941
942//____________________________________________________________________________
943template <typename AFloat>
945{
946
947 assert( B.GetShape().size() == 3 );
948 assert( A.GetShape().size() == 3 );
949
950 size_t size = A.GetFirstSize();
951 size_t nRows = A.GetHSize();
952 size_t nCols = A.GetWSize();
953
954 assert ( B.GetFirstSize() == 1);
955 assert ( B.GetHSize() == size);
956 assert ( B.GetWSize() == nRows*nCols);
957 for (size_t i = 0; i < (size_t)size; i++) {
958 for (size_t j = 0; j < (size_t)nRows; j++) {
959 for (size_t k = 0; k < (size_t)nCols; k++) {
960 A(i, j, k) = B(0, i, j * nCols + k);
961 }
962 }
963 }
964}
965
966//______________________________________________________________________________
967template <typename AFloat>
969{
970 // B x T x D out --- T x B x D in*/
971 assert ( out.GetShape().size() == 3 && in.GetShape().size() == 3);
972
973
974 size_t B = out.GetFirstSize();
975 size_t T = out.GetCSize(); //1 for row-major
976 size_t D = out.GetWSize(); // 2 for row-major
977 if ((T != in.GetFirstSize()) || (B != in.GetCSize()) || (D != in.GetWSize()) ) {
978 std::cout << "Incompatible Dimensions\n"
979 << in.GetFirstSize() << "x" << in.GetCSize() << "x" << in.GetWSize() << " --> " << B << "x" << T << "x"
980 << D << "\n";
981 assert(false);
982 return;
983 }
984 for (size_t i = 0; i < B; ++i) {
985 for (size_t j = 0; j < T; ++j) {
986 for (size_t k = 0; k < D; ++k) {
987 out( i, j, k ) = in( j, i, k);
988 }
989 }
990 }
991 return;
992}
993
994} // namespace DNN
995} // namespace TMVA
int Int_t
Definition: CPyCppyy.h:43
unsigned int UInt_t
Definition: CPyCppyy.h:44
double
Definition: Converters.cxx:939
#define d(i)
Definition: RSha256.hxx:102
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:118
double floor(double)
double sqrt(double)
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
TCpuBuffer GetSubBuffer(size_t offset, size_t start) const
Return sub-buffer of size start starting at element offset.
Definition: CpuBuffer.cxx:47
The TCpuMatrix class.
Definition: CpuMatrix.h:86
size_t GetNcols() const
Definition: CpuMatrix.h:156
static size_t GetOnePointerSize()
Definition: CpuMatrix.h:112
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:269
static Executor & GetThreadExecutor()
Definition: CpuMatrix.h:169
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:191
size_t GetNrows() const
Definition: CpuMatrix.h:155
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:166
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:98
static const AFloat * GetOnePointer()
Definition: CpuMatrix.h:110
size_t GetNoElements() const
Definition: CpuMatrix.h:157
size_t GetWSize() const
Definition: CpuTensor.h:177
const TCpuBuffer< AFloat > & GetDeviceBuffer() const
Definition: CpuTensor.h:145
size_t GetCSize() const
Definition: CpuTensor.h:161
size_t GetFirstSize() const
Definition: CpuTensor.h:155
TCpuTensor< AFloat > At(size_t i)
Definition: CpuTensor.h:221
TCpuMatrix< AFloat > GetMatrix() const
Definition: CpuTensor.h:197
typename TMVA::Experimental::RTensor< AFloat >::Shape_t Shape_t
Definition: CpuTensor.h:47
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
AReal Scalar_t
Definition: Cpu.h:69
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams &params, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector< int > &V)
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static Tensor_t BatchNormLayerReshapeTensor(int axis, const Tensor_t &x)
static void Im2colIndices(std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.hxx:30
static void AddConvBiases(TMatrixT< AReal > &output, const TMatrixT< AReal > &biases)
Add the biases in the Convolutional Layer.
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:111
std::size_t GetSize() const
Definition: RTensor.hxx:241
const Shape_t & GetShape() const
Definition: RTensor.hxx:242
TMatrixT.
Definition: TMatrixT.h:39
double beta(double x, double y)
Calculates the beta function.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
static double B[]
static double A[]
static double C[]
double gamma(double x)
double T(double x)
Definition: ChebyshevPol.h:34
void Copy(void *source, void *dest)
void Error(const char *location, const char *va_(fmt),...)
Definition: TClingUtils.h:789
void Fatal(const char *location, const char *va_(fmt),...)
Definition: TClingUtils.h:829
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const AReal *alpha, const AReal *A, const int *lda, const AReal *B, const int *ldb, const AReal *beta, AReal *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
void Ger(const int *m, const int *n, const AReal *alpha, const AReal *x, const int *incx, const AReal *y, const int *incy, AReal *A, const int *lda)
Add the outer product of x and y to the matrix A.
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
size_t strideRows
The number of row pixels to slid the filter each step.
Definition: ConvLayer.h:57
size_t filterHeight
The height of the filter.
Definition: ConvLayer.h:54
size_t inputHeight
The height of the previous layer or input.
Definition: ConvLayer.h:50
size_t paddingWidth
The number of zero layers left and right of the input.
Definition: ConvLayer.h:60
size_t filterWidth
The width of the filter.
Definition: ConvLayer.h:55
size_t paddingHeight
The number of zero layers added top and bottom of the input.
Definition: ConvLayer.h:59
size_t inputWidth
The width of the previous layer or input.
Definition: ConvLayer.h:51
size_t inputDepth
The depth of the previous layer or input.
Definition: ConvLayer.h:49
size_t strideCols
The number of column pixels to slid the filter each step.
Definition: ConvLayer.h:58
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
auto * a
Definition: textangle.C:12
static uint64_t sum(uint64_t i)
Definition: Factory.cxx:2345
REAL epsilon
Definition: triangle.c:618
static void output(int code)
Definition: gifencode.c:226