Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_ConvTranspose.icc
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_CONVTRANSPOSE_I
2#define TMVA_SOFIE_ROPERATOR_CONVTRANSPOSE_I
3
4#include <memory>
5#include <sstream>
6#include <algorithm>
7#include <stdexcept>
8#include <vector>
9#include <cassert>
10
11#include <TMVA/SOFIE_common.hxx>
12
13namespace TMVA {
14namespace Experimental {
15namespace SOFIE {
16
17template <typename T>
18auto ROperator_ConvTranspose<T>::ShapeInference(std::vector<std::vector<size_t>> input)
19 -> std::vector<std::vector<size_t>>
20{
21 const std::vector<size_t> &inputShape = input[0];
22 const std::vector<size_t> &weightShape = input[1];
23 size_t size = inputShape.size();
24 // Dimension of the conv transpose op
25 fDim = size - 2;
26 // Number of groups
27 if (fAttrGroup == 0)
28 fAttrGroup = 1;
29 if (fAttrStrides.empty()) {
30 fAttrStrides = std::vector<size_t>(fDim, 1);
31 }
32 if (fAttrDilations.empty()) {
33 fAttrDilations = std::vector<size_t>(fDim, 1);
34 }
35 // The shape of the kernel is kw for 1d image, kh x Kw for 2d images and kd x kh x kw for a 3d image
36 if (fAttrKernelShape.empty()) {
37 fAttrKernelShape.resize(fDim);
38 for (size_t i = 0; i < fDim; i++)
39 fAttrKernelShape[i] = fShapeW[i + 2] + (fAttrDilations[i] - 1) * (fShapeW[i + 2] - 1);
40 }
41 if (fAttrOutputPadding.empty())
42 fAttrOutputPadding = std::vector<size_t>(fDim, 0);
43
44 // The Shape of the output is batch_size x out_channel x out_w for a 1d image,
45 // batch_size x out_channel x out_h x out_w for a 2d image and
46 // batch_size x out_channel x out_d x out_h x out_w for a 3d image
47 // where out_channel = weight_shape[1] * group
48 std::vector<size_t> outShape(size);
49 outShape[0] = inputShape[0];
50 outShape[1] = weightShape[1] * fAttrGroup;
51
52
53 // Generate the padding
54 if (fAttrPads.empty() ) {
55 fAttrPads = std::vector<size_t>(2 * fDim, 0);
56 if (fAttrOutputShape.size() == fDim) {
57 //LM: to be checked...
58 // for time being not support
59 throw
60 std::runtime_error("ConvTranspose with output_shape explicitly set not yet supported.");
61 /*
62 std::vector<size_t> totalPadding(fDim, 1);
63 for (size_t i = 0; i < fDim; i++) {
64 size_t j = i + 2;
65 totalPadding[i] =
66 fAttrStrides[i] * (fAttrOutputShape[i] - 1) + fAttrOutputPadding[i] + fAttrKernelShape[i] - fShapeX[j];
67 }
68
69 for (size_t i = 0; i < fDim; i++) {
70 size_t end_i = i + fDim;
71 if (fAttrAutopad == "SAME_UPPER") {
72 fAttrPads[i] = totalPadding[i] / 2;
73 fAttrPads[end_i] = totalPadding[i] - fAttrPads[i];
74 } else {
75 fAttrPads[end_i] = totalPadding[i] / 2;
76 fAttrPads[i] = totalPadding[i] - fAttrPads[end_i];
77 }
78 }
79 */
80 }
81 if (fAttrAutopad != "NOTSET") {
82 throw
83 std::runtime_error("ConvTranspose with padding SAME_UPPER or SMAE_LOWER not supported");
84 }
85 }
86 if (fAttrOutputShape.empty()) {
87 fAttrOutputShape.resize(fDim);
88 for (size_t i = 0; i < fDim; i++) {
89 size_t j = i + 2;
90 fAttrOutputShape[i] = fAttrStrides[i] * (inputShape[j] - 1) + fAttrKernelShape[i] + fAttrOutputPadding[i] - fAttrPads[i] - fAttrPads[fDim+i];
91 }
92 } else {
93 // The shape of the output is explicitly set
94 // TODO Generate the padding from the output shape and the input shape
95 throw
96 std::runtime_error("ConvTranspose with output_shape explicitly set not yet supported.");
97 }
98
99 for (size_t i = 0; i < fDim; i++)
100 outShape[i + 2] = fAttrOutputShape[i];
101 std::vector<std::vector<size_t>> ret({outShape});
102 return ret;
103}
104
105template <typename T>
107{
108 fUseSession = model.UseSession();
109 if (!model.CheckIfTensorAlreadyExist(fNX)) {
110 throw std::runtime_error("TMVA SOFIE Conv Transpose op Input Tensor " + fNX + " is not found in model");
111 }
112 fShapeX = model.GetTensorShape(fNX);
113 if (fShapeX.size() < 3 || fShapeX.size() > 5) {
114 std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl;
115 throw std::runtime_error("TMVA SOFIE Conv Transpose Op input data tensor" + fNX +
116 " is not of 3,4 or 5 dimensions");
117 }
118 fDim = fShapeX.size() - 2;
119 if (!model.CheckIfTensorAlreadyExist(fNW)) {
120 throw std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model");
121 }
122 fShapeW = model.GetTensorShape(fNW);
123 if (fShapeW.size() < 3 || fShapeW.size() > 5) {
124 std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl;
125 throw std::runtime_error("TMVA SOFIE Conv Transpose Op input weight tensor" + fNW +
126 " is not of 3,4 or 5 dimensions");
127 }
128 fShapeY = ShapeInference({fShapeX, fShapeW})[0];
129
130 model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
131 if (fNB != "") {
132 if (!model.CheckIfTensorAlreadyExist(fNB)) {
133 throw std::runtime_error("TMVA SOFIE ConvTrans op Input Tensor " + fNB + " is not found in model");
134 }
135 fShapeB = model.GetTensorShape(fNB);
136 if (fShapeB.size() < 1)
137 throw std::runtime_error("TMVA SOFIE ConvTrans op: Bias Tensor has empty shape");
138
139 size_t bsize = ConvertShapeToLength(fShapeB);
140 size_t ysize = ConvertShapeToLength(fShapeY);
141 // broadcasting is needed if first stride of B is not same of Y
142 bool broadcast_needed = (bsize != ysize);
143 // Broadcast the bias B
144 if (broadcast_needed) {
145 // we assume bias tensor size is equal to number of filters that is the second dimension in
146 // the output tensor
147 if (bsize != fShapeY[1] )
148 throw std::runtime_error("TMVA SOFIE ConvTrans op: Bias Tensor has wrong shape: " +
149 ConvertShapeToString(fShapeB));
150
151 auto original_data = model.GetInitializedTensorData(fNB);
152
153 if (fType != "float")
154 throw std::runtime_error("TMVA SOFIE ConvTrans op: Broadcasting for non-float type tensors is not supported");
155 // here the acual broadcasting
156 if (!fUseSession) {
157 // Broadcast B from M to N x M x Od x Oh x Ow
158 std::shared_ptr<void> new_data_ptr(
159 UTILITY::BroadcastConvBias<float>(static_cast<float *>(original_data.get()), bsize, fShapeY),
160 std::default_delete<float[]>());
161
162 model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), fShapeY, new_data_ptr);
163 fShapeB = model.GetTensorShape(fNB);
164 fNBroadcastedB = fNB; // use same name
165 } else {
166 // In case of session add broadcasting code in Session constructor and in GenerateInitCode
167 // we need to add a new intermediate tensor for broadcasted bias tensor
168 fNBroadcastedB = "Broadcasted" + fNB;
169 model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY);
170 }
171 }
172 else {
173 // bias tensor is already correct shape, no need to broadcast
174 if (fShapeY != fShapeB)
175 throw std::runtime_error("TMVA SOFIE ConvTrans op: Broadcasting is not needed but bias has wrong shape" +
176 ConvertShapeToString(fShapeB));
177 fNBroadcastedB = fNB;
178 }
179 }
180}
181
182template <typename T>
184{
185 std::stringstream out;
186 // generate initialization code for broadcasting of bias tensor
187 size_t bsize = ConvertShapeToLength(fShapeB);
188 size_t ysize = ConvertShapeToLength(fShapeY);
189 if (bsize != ysize && !fNBroadcastedB.empty()) {
190 // include a separate scope to avoid defining unique operator temp variables
191 out << SP << "{\n";
192 out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::BroadcastConvBias<float>(tensor_"
193 << fNB << ", " << bsize << ", " << ConvertShapeToString(fShapeY) << ");\n";
194 out << SP << SP << "std::copy(data, data + " << ConvertShapeToLength(fShapeY) << ", tensor_" << fNBroadcastedB << ");\n";
195 out << SP << SP << "delete[] data;\n";
196 out << SP << "}\n";
197 }
198 return out.str();
199}
200
201// generate code for Session data members (e.g. internal vectors)
202template <typename T>
204{
205
206 //size_t outputChannelSize = fShapeY[1];
207 size_t kernelSize = 1;
208 size_t inputSize = 1;
209 for (size_t i = 0; i < fDim; i++) {
210 inputSize *= fShapeX[2+ i];
211 kernelSize *= fAttrKernelShape[i];
212 }
213
214 opName = "op_" + opName;
215 std::stringstream out;
216 // matrix with convolution kernels
217 out << "std::vector<" << fType << "> fVec_" << opName << "_f = std::vector<" << fType << ">("
218 << fShapeW[0] * fShapeW[1] * kernelSize << ");\n";
219 // output matrix of im2col
220 out << "std::vector<" << fType << "> fVec_" << opName << "_xcol = std::vector<" << fType << ">("
221 << kernelSize * fShapeW[1] * inputSize << ");\n"; // kernelsize * output channel size * input size
222 out << "\n";
223
224 return out.str();
225}
226
227template <typename T>
228std::string ROperator_ConvTranspose<T>::Generate(std::string OpName)
229{
230 OpName = "op_" + OpName;
231
232 if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) {
233 throw std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first");
234 }
235
236 std::stringstream out;
237
238 size_t bsize = fShapeX[0];
239 size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; // kernel depth
240 size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; // kernel height
241 size_t kWidth = fShapeW[fDim + 1]; // kernel width
242
243 size_t iDepth = (fDim > 2) ? fShapeX[2] : 1; // input depth
244 size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1; // input height
245 size_t iWidth = fShapeX[fDim + 1]; // input width
246
247 size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; // output depth
248 size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; // ouput height
249 size_t oWidth = fShapeY[fDim + 1]; // output width
250
251 out << "\n//---- operator ConvTranspose " << OpName << "\n";
252
253 // create first matrix with convolution kernels
254 if (fUseSession)
255 out << SP << fType << " * " << OpName << "_f = fVec_" << OpName << "_f.data();\n";
256 else {
257 size_t kernelSize = fAttrKernelShape[0];
258 if (fDim > 1)
259 kernelSize *= fAttrKernelShape[1];
260 out << SP << fType << " " << OpName << "_f[" << fShapeW[0] * fShapeW[1] * kernelSize << "] = {0};\n";
261 }
262
263 // vectorize the (dilated)convolution kernels into a matrix
264 // The shape of the kernel is W for 1d image, H x W for 2d image and D x H x W
265 // for 3d image
266 size_t id = (fDim > 2) ? fDim - 3 : 2;
267 size_t ih = (fDim > 1) ? fDim - 2 : 1;
268 size_t iw = fDim - 1;
269 size_t wstrideDil = fAttrDilations[iw];
270 size_t hstride = kWidth;
271 size_t hstrideDil = fAttrKernelShape[iw];
272 if (fDim > 1)
273 hstrideDil *= fAttrDilations[ih];
274 // stride dilated in the height
275 size_t dstride = kHeight * kWidth;
276 size_t dstrideDil = fAttrKernelShape[iw];
277 if (fDim > 1)
278 dstrideDil *= fAttrKernelShape[ih];
279 if (fDim > 2)
280 dstrideDil *= fAttrDilations[id];
281 size_t icstride = kHeight * kWidth * kDepth;
282 size_t icstrideDil = 1;
283 for (size_t i = 0; i < fDim; i++)
284 icstrideDil *= fAttrKernelShape[i];
285 size_t ocstride = fShapeW[1] * icstride;
286 size_t ocstrideDil = fShapeW[1] * icstrideDil;
287
288 // The shape of f is [M/group, kHeight x kWidth]
289 out << SP << "for (std::size_t ic = 0; ic < " << fShapeW[0] << "; ic++) {\n";
290 out << SP << SP << "for (std::size_t oc = 0; oc < " << fShapeW[1] << "; oc++) {\n";
291 //out << SP << SP << SP << "size_t kIndex = 0;\n"; // filter index
292 if (fDim > 2)
293 out << SP << SP << SP << "for (std::size_t kd = 0; kd < " << kDepth << "; kd++) {\n";
294 if (fDim > 1)
295 out << SP << SP << SP << "for (std::size_t kh = 0; kh < " << kHeight << "; kh++) {\n";
296 out << SP << SP << SP << SP << "for (std::size_t kw = 0; kw < " << kWidth << "; kw++) {\n";
297
298 out << SP << SP << SP << SP << SP << OpName << "_f[ic * " << ocstrideDil << " + oc * " << icstrideDil;
299 if (fDim > 2)
300 out << " + kd * " << dstrideDil;
301 if (fDim > 1)
302 out << " + kh * " << hstrideDil;
303 out << " + kw * " << wstrideDil << " ] = tensor_" << fNW << "[ic * " << ocstride << " + oc * " << icstride;
304
305 if (fDim > 2)
306 out << " + kd * " << dstride;
307 if (fDim > 1)
308 out << " + kh * " << hstride;
309 out << " + kw ];\n";
310
311 // here we rotate the input kernel tranforming 0,1,2,...N-1 in N-1,N-2,...,2,1,0
312 // out << " + " << icstride -1 << " - kIndex ];\n"; // tranform 1,2,3,4 in 4,3,2,1
313 // out << SP << SP << SP << SP << SP << "kIndex++;\n"; // update input filter index
314
315 out << SP << SP << SP << SP << "}\n";
316 if (fDim > 1)
317 out << SP << SP << SP << "}\n";
318 if (fDim > 2)
319 out << SP << SP << SP << "}\n";
320
321 out << SP << SP << "}\n";
322 out << SP << "}\n";
323
324 out << SP << "char " << OpName << "_transA = 'N';\n";
325 out << SP << "char " << OpName << "_transB = 'T';\n";
326 out << SP << "int " << OpName << "_m = " << iHeight * iWidth * iDepth << ";\n";
327 out << SP << "int " << OpName << "_n = " << icstrideDil*fShapeW[1] << ";\n"; // output channels * filters
328 out << SP << "int " << OpName << "_k = " << fShapeW[0] << ";\n"; // input channels
329 out << SP << "float " << OpName << "_alpha = 1.0;\n";
330 out << SP << "float " << OpName << "_beta = 0.0;\n";
331
332 if (fUseSession) {
333 out << SP << fType << " * " << OpName << "_xcol = fVec_" << OpName << "_xcol.data();\n";
334 } else {
335 out << SP << fType << " " << OpName << "_xcol[" << fShapeW[0]*icstrideDil * oDepth * oHeight * oWidth << "] = {0};\n";
336 }
337
338 // Loop on batch size
339 out << SP << "for (size_t n = 0; n < " << bsize << "; n++) {\n";
340
341 // IM2COL: Unroll the input tensor
342 // order input data as (e.g. kernel 2x2) and (xa,ya) is channel 1 and (xb,yb) is channel 2
343 // (xa1,..,xak,ya1,..yak)(xb1,...,xbk,yb1,..,ybk)
344 // (xa2,...xak+1,ya1,...yak)(......)
345 // trick for speed is using caffe im2col and output a matrix which contains filtered values as rows.
346 // By doing this one has consecutive memory reads and writes
347 // Resulting matrix op_xcol is (output channels * filter_h * filter_w , output_h * output_w)
348 if (fDim == 1) {
349 if (fAttrPads[0] != fAttrPads[1]) {
350 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding "
351 << std::endl;
352 fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2;
353 }
354 fAttrPads[1] = 0;
355 }
356 if (fDim == 2) {
357 if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) {
358 std::cout << "TMVA SOFIE Operator ConvTranspose: asymmetric padding not supported. Assume an average padding "
359 << std::endl;
360 fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2;
361 fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2;
362 }
363 }
364 if (fDim == 3) {
365 if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) {
366 std::cout << "TMVA SOFIE Operator ConvTranspose: asymmetric padding not supported. Assume an average padding "
367 << std::endl;
368 fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2;
369 fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2;
370 fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2;
371 }
372 }
373
374 if (fAttrGroup == 1) {
375 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iDepth * iHeight * iWidth << ";\n";
376 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
377
378 // DO BLAS before:
379 // BLAS
380 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
381 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, "
382 << "tensor_" << fNX << " + x_offset, &" << OpName << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
383 out << SP << SP << SP << OpName << "_f, &" << OpName << "_n, &" << OpName << "_beta, "
384 << OpName << "_xcol, &" << OpName << "_m);\n";
385
386 // when using im2col - resulting matrix is transposed, is (input_c * filter_h * filter_w, output_h *
387 // output_w)
388 // before using col2im I need to transpose matrix
389 if (fDim < 3) {
390 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::col2im<float>(" << OpName << "_xcol,"
391 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
392 // dilation_w,
393 << fShapeY[1] << "," << oHeight << "," << oWidth << ",";
394 if (fDim == 1)
395 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
396 << fAttrDilations[0];
397 else // dim ==2
398 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
399 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
400 << fAttrDilations[1];
401 out << ", tensor_" << fNY << " + out_offset);\n\n ";
402 } else {
403 // 3d : needs a col2im for 3d
404 throw std::runtime_error("TMVA SOFIE 3D Conv Transpose not yet supported");
405 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
406 << " + x_offset,"
407 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
408 // dilation_d, dilation_h, dilation_w,
409 //
410 << fShapeX[1] << "," << oDepth << "," << oHeight << "," << oWidth << "," << fAttrKernelShape[0] << ","
411 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1] << ","
412 << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
413 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << "," << OpName
414 << "_xcol);\n\n ";
415 }
416 // // BLAS
417 // out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
418 // << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << OpName << "_xcol, &" << OpName
419 // << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
420 // out << SP << SP << SP << OpName << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
421 // << " + out_offset, &" << OpName << "_m);\n";
422 } else {
423 // case of group transposed convolution
424 // Unroll (IM2COL) the input tensor- make loop on groups and repeat operations (IM2COL + GEMM for each
425 // group)
426 out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n";
427 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iHeight * iWidth << " + g * "
428 << fShapeX[1] * iHeight * iWidth / fAttrGroup << ";\n ";
429 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oHeight * oWidth << " + g * "
430 << fShapeY[1] * oHeight * oWidth / fAttrGroup << ";\n ";
431
432 // do BLAS here (LM: probably need an offset for op_f the kernels)
433 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
434 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, "
435 << "tensor_" << fNX << " + x_offset, &" << OpName
436 << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
437 out << SP << SP << SP << OpName << "_f, &" << OpName << "_n, &" << OpName
438 << "_beta, " << OpName << "_xcol , &" << OpName << "_m);\n";
439
440 if (fDim < 3) {
441 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::col2im<float>(" << OpName << "_xcol,"
442 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
443 // dilation_w,
444 << fShapeY[1] << "," << oHeight << "," << oWidth << ",";
445 if (fDim == 1)
446 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
447 << fAttrDilations[0];
448 else // dim ==2
449 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
450 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
451 << fAttrDilations[1];
452 out << ", tensor_" << fNY << " + out_offset);\n\n ";
453 } else {
454 // 3d im2col
455 throw std::runtime_error("TMVA SOFIE 3D Conv Transpose not yet supported");
456
457 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
458 << " + x_offset,"
459 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
460 // dilation_d, dilation_h, dilation_w,
461 //
462 << fShapeX[1] << "," << oDepth << "," << oHeight << "," << oWidth << "," << fAttrKernelShape[0] << ","
463 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1] << ","
464 << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
465 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << "," << OpName
466 << "_xcol);\n\n ";
467 }
468
469 // // BLAS
470 // // offset g must be g * k * n
471 // out << SP << SP << SP << "size_t offset_f = g * " << fShapeW[0] * fShapeW[1] * icstrideDil / fAttrGroup << ";\n";
472 // out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
473 // << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << OpName << "_xcol, &" << OpName
474 // << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
475 // out << SP << SP << SP << OpName << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
476 // << " + out_offset"
477 // << ", &" << OpName << "_m);\n";
478
479 out << SP << SP << "}\n"; // end of group loop
480 }
481
482 out << SP << "}\n"; // end of batch size loop
483
484 if (fNBroadcastedB != "") {
485 out << SP << "int " << OpName << "_size = " << fShapeY[0] * fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
486 out << SP << "float " << OpName << "_gamma = 1.0;\n";
487 out << SP << "int " << OpName << "_incx = 1;\n";
488 out << SP << "int " << OpName << "_incy = 1;\n";
489
490 out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNBroadcastedB << ", &"
491 << OpName << "_incx, tensor_" << fNY << ", &" << OpName << "_incy);\n";
492 }
493
494 return out.str();
495}
496
497} // namespace SOFIE
498} // namespace Experimental
499} // namespace TMVA
500
501#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
const ETensorType & GetTensorType(std::string name)
Definition RModel.cxx:91
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:187
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:116
const std::vector< size_t > & GetTensorShape(std::string name)
Definition RModel.cxx:56
std::shared_ptr< void > GetInitializedTensorData(std::string tensor_name)
Definition RModel.cxx:248
void UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector< std::size_t > shape, std::shared_ptr< void > data)
Definition RModel.cxx:239
void Initialize(RModel &) override
Initialize the model.
std::string GenerateSessionMembersCode(std::string) override
Generate code for Session data members (e.g.
std::string GenerateInitCode() override
Generate code for initializing the op.
std::string Generate(std::string opName) override
Generate the inference code.
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > >) override
Infers the shape of the input tensors.
std::string ConvertShapeToString(std::vector< size_t > shape)
std::size_t ConvertShapeToLength(std::vector< size_t > shape)
create variable transformations