Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_ConvTranspose.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_CONVTRANSPOSE_HXX
2#define TMVA_SOFIE_ROPERATOR_CONVTRANSPOSE_HXX
3
5#include <TMVA/ROperator.hxx>
6#include <TMVA/RModel.hxx>
7
8#include <memory>
9#include <sstream>
10#include <algorithm>
11#include <stdexcept>
12#include <vector>
13#include <cassert>
14
16
17/*! \brief Transposed Convolution operator
18 *
19 * Inference code generation for a transposed convolution layer.
20 * See the <a href="https://github.com/onnx/onnx/blob/main/docs/Operators.md#convtranspose">ONNX documentation</a> for
21 * details about the transposed conv layer.
22 */
23template <typename T>
25private:
26 std::string fAttrAutopad;
27 std::vector<size_t> fAttrDilations;
28 size_t fAttrGroup;
29 std::vector<size_t> fAttrKernelShape;
30 std::vector<size_t> fAttrOutputPadding;
31 std::vector<size_t> fAttrOutputShape;
32 std::vector<size_t> fAttrPads;
33 std::vector<size_t> fAttrStrides;
34
35 std::string fNX;
36 std::string fNW;
37 std::string fNB;
38 std::string fNBroadcastedB;
39 std::string fNY;
40
41 std::string fConvK;
42 std::string fImcol;
43
44 std::vector<size_t> fShapeX;
45 std::vector<size_t> fShapeW;
46 std::vector<size_t> fShapeB;
47 std::vector<size_t> fShapeY;
48
49 std::string fType;
50
51 size_t fDim; // dimension of the convolution
52
53public:
54 /*! Default constructor of ROperator_ConvTranspose */
56
57 /*! \brief Constructor of ROperator_ConvTranspose from the attributes
58 *
59 * \param autopad padding
60 * \param dilations dilations of the kernel
61 * \param group number of groups
62 * \param kernelShape shape of the kernel
63 * \param outputPadding padding of the output
64 * \param outputShape shape of the output
65 * \param pads padding of the input
66 * \param strides strides
67 * \param nameX name of the input
68 * \param nameW name of the weight
69 * \param nameB name of the bias
70 * \param nameY name of the output
71 */
72 ROperator_ConvTranspose(std::string autopad, std::vector<size_t> dilations, size_t group,
73 std::vector<size_t> kernelShape, std::vector<size_t> outputPadding,
74 std::vector<size_t> outputShape, std::vector<size_t> pads, std::vector<size_t> strides,
75 std::string nameX, std::string nameW, std::string nameB, std::string nameY)
78 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)), fNB(UTILITY::Clean_name(nameB)),
79 fNY(UTILITY::Clean_name(nameY))
80 {
83 if (!fNB.empty()) {
84 fInputTensorNames.emplace_back(fNB);
85 }
86
87 if (std::is_same<T, float>::value) {
88 fType = "float";
89 } else {
90 throw std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
91 }
92 }
93
94 /*! \brief Infers the type of the output tensor
95 * \param input type of the input tensors
96 */
97 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override
98 {
99 ETensorType out = input[0];
100 return {out};
101 }
102
103 /*! \brief Infers the shape of the input tensors
104 * \param input shape of the input tensors
105 */
106 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> /*input*/) override;
107
108 /*! \brief Initialize the model
109 * \param model Model
110 */
111 void Initialize(RModel &) override;
112
113 /*! \brief Generate code for initializing the op
114 */
115 std::string GenerateInitCode() override;
116
117 /*! \brief Generate the inference code
118 * \param opName name of the operator
119 */
120 std::string Generate(std::string opName) override;
121
122 /*! \brief Returns the blas routines needed to compile the generated code
123 */
124 std::vector<std::string> GetBlasRoutines() override { return { std::string("Gemm"), std::string("Axpy") }; }
125};
126
127template <typename T>
128auto ROperator_ConvTranspose<T>::ShapeInference(std::vector<std::vector<size_t>> input)
129 -> std::vector<std::vector<size_t>>
130{
131 const std::vector<size_t> &inputShape = input[0];
132 const std::vector<size_t> &weightShape = input[1];
133 size_t size = inputShape.size();
134 // Dimension of the conv transpose op
135 fDim = size - 2;
136 // Number of groups
137 if (fAttrGroup == 0)
138 fAttrGroup = 1;
139 if (fAttrStrides.empty()) {
140 fAttrStrides = std::vector<size_t>(fDim, 1);
141 }
142 if (fAttrDilations.empty()) {
143 fAttrDilations = std::vector<size_t>(fDim, 1);
144 }
145 // The shape of the kernel is kw for 1d image, kh x Kw for 2d images and kd x kh x kw for a 3d image
146 if (fAttrKernelShape.empty()) {
147 fAttrKernelShape.resize(fDim);
148 for (size_t i = 0; i < fDim; i++)
149 fAttrKernelShape[i] = fShapeW[i + 2] + (fAttrDilations[i] - 1) * (fShapeW[i + 2] - 1);
150 }
151 if (fAttrOutputPadding.empty())
152 fAttrOutputPadding = std::vector<size_t>(fDim, 0);
153
154 // The Shape of the output is batch_size x out_channel x out_w for a 1d image,
155 // batch_size x out_channel x out_h x out_w for a 2d image and
156 // batch_size x out_channel x out_d x out_h x out_w for a 3d image
157 // where out_channel = weight_shape[1] * group
158 std::vector<size_t> outShape(size);
159 outShape[0] = inputShape[0];
160 outShape[1] = weightShape[1] * fAttrGroup;
161
162 // Generate the padding
163 if (fAttrPads.empty()) {
164 fAttrPads = std::vector<size_t>(2 * fDim, 0);
165 if (fAttrOutputShape.size() == fDim) {
166 // LM: to be checked...
167 // for time being not support
168 throw std::runtime_error("ConvTranspose with output_shape explicitly set not yet supported.");
169 /*
170 std::vector<size_t> totalPadding(fDim, 1);
171 for (size_t i = 0; i < fDim; i++) {
172 size_t j = i + 2;
173 totalPadding[i] =
174 fAttrStrides[i] * (fAttrOutputShape[i] - 1) + fAttrOutputPadding[i] + fAttrKernelShape[i] - fShapeX[j];
175 }
176
177 for (size_t i = 0; i < fDim; i++) {
178 size_t end_i = i + fDim;
179 if (fAttrAutopad == "SAME_UPPER") {
180 fAttrPads[i] = totalPadding[i] / 2;
181 fAttrPads[end_i] = totalPadding[i] - fAttrPads[i];
182 } else {
183 fAttrPads[end_i] = totalPadding[i] / 2;
184 fAttrPads[i] = totalPadding[i] - fAttrPads[end_i];
185 }
186 }
187 */
188 }
189 if (fAttrAutopad != "NOTSET") {
190 throw std::runtime_error("ConvTranspose with padding SAME_UPPER or SMAE_LOWER not supported");
191 }
192 }
193 if (fAttrOutputShape.empty()) {
194 fAttrOutputShape.resize(fDim);
195 for (size_t i = 0; i < fDim; i++) {
196 size_t j = i + 2;
197 fAttrOutputShape[i] = fAttrStrides[i] * (inputShape[j] - 1) + fAttrKernelShape[i] + fAttrOutputPadding[i] -
198 fAttrPads[i] - fAttrPads[fDim + i];
199 }
200 } else {
201 // The shape of the output is explicitly set
202 // TODO Generate the padding from the output shape and the input shape
203 throw std::runtime_error("ConvTranspose with output_shape explicitly set not yet supported.");
204 }
205
206 for (size_t i = 0; i < fDim; i++)
207 outShape[i + 2] = fAttrOutputShape[i];
208 std::vector<std::vector<size_t>> ret({outShape});
209 return ret;
210}
211
212template <typename T>
214{
215
216 fUseSession = model.UseSession();
217 if (!model.CheckIfTensorAlreadyExist(fNX)) {
218 throw std::runtime_error("TMVA SOFIE Conv Transpose op Input Tensor " + fNX + " is not found in model");
219 }
220 fShapeX = model.GetTensorShape(fNX);
221 if (fShapeX.size() < 3 || fShapeX.size() > 5) {
222 std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl;
223 throw std::runtime_error("TMVA SOFIE Conv Transpose Op input data tensor" + fNX +
224 " is not of 3,4 or 5 dimensions");
225 }
226 fDim = fShapeX.size() - 2;
227 if (!model.CheckIfTensorAlreadyExist(fNW)) {
228 throw std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model");
229 }
230 fShapeW = model.GetTensorShape(fNW);
231 if (fShapeW.size() < 3 || fShapeW.size() > 5) {
232 std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl;
233 throw std::runtime_error("TMVA SOFIE Conv Transpose Op input weight tensor" + fNW +
234 " is not of 3,4 or 5 dimensions");
235 }
236 fShapeY = ShapeInference({fShapeX, fShapeW})[0];
237
238 model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
239 if (fNB != "") {
240 if (!model.CheckIfTensorAlreadyExist(fNB)) {
241 throw std::runtime_error("TMVA SOFIE ConvTrans op Input Tensor " + fNB + " is not found in model");
242 }
243 fShapeB = model.GetTensorShape(fNB);
244 if (fShapeB.size() < 1)
245 throw std::runtime_error("TMVA SOFIE ConvTrans op: Bias Tensor has empty shape");
246
247 size_t bsize = ConvertShapeToLength(fShapeB);
248 size_t ysize = ConvertShapeToLength(fShapeY);
249 // broadcasting is needed if first stride of B is not same of Y
250 bool broadcast_needed = (bsize != ysize);
251 // Broadcast the bias B
252 if (broadcast_needed) {
253 // we assume bias tensor size is equal to number of filters that is the second dimension in
254 // the output tensor
255 if (bsize != fShapeY[1])
256 throw std::runtime_error("TMVA SOFIE ConvTrans op: Bias Tensor has wrong shape: " +
257 ConvertShapeToString(fShapeB));
258
259 auto original_data = model.GetInitializedTensorData(fNB);
260
261 if (fType != "float")
262 throw std::runtime_error(
263 "TMVA SOFIE ConvTrans op: Broadcasting for non-float type tensors is not supported");
264 // here the acual broadcasting
265 if (!fUseSession) {
266 // Broadcast B from M to N x M x Od x Oh x Ow
267 std::shared_ptr<void> new_data_ptr(
268 UTILITY::BroadcastConvBias<float>(static_cast<float *>(original_data.get()), bsize, fShapeY),
269 std::default_delete<float[]>());
270
271 model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), fShapeY, new_data_ptr);
272 fShapeB = model.GetTensorShape(fNB);
273 fNBroadcastedB = fNB; // use same name
274 } else {
275 // In case of session add broadcasting code in Session constructor and in GenerateInitCode
276 // we need to add a new intermediate tensor for broadcasted bias tensor
277 fNBroadcastedB = "Broadcasted" + fNB;
278 model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY);
279 }
280 } else {
281 // bias tensor is already correct shape, no need to broadcast
282 if (fShapeY != fShapeB)
283 throw std::runtime_error("TMVA SOFIE ConvTrans op: Broadcasting is not needed but bias has wrong shape" +
284 ConvertShapeToString(fShapeB));
285 fNBroadcastedB = fNB;
286 }
287 }
288
289 size_t kernelSize = 1;
290 size_t inputSize = 1;
291 for (size_t i = 0; i < fDim; i++) {
292 inputSize *= fShapeX[2 + i];
293 kernelSize *= fAttrKernelShape[i];
294 }
295
296 std::vector<size_t> shape1 = {fShapeW[0], fShapeW[1], kernelSize};
297 std::vector<size_t> shape2 = {fShapeW[1], kernelSize, inputSize};
298 model.AddIntermediateTensor(fNX + "_f", ConvertStringToType(fType), shape1);
299 model.AddIntermediateTensor(fNX + "_xcol", ConvertStringToType(fType), shape2);
300 fConvK = fNX + "_f";
301 fImcol = fNX + "_xcol";
302 fOutputTensorNames.emplace_back(fConvK);
303 fOutputTensorNames.emplace_back(fImcol);
304}
305
306template <typename T>
308{
309 std::stringstream out;
310 // generate initialization code for broadcasting of bias tensor
311 size_t bsize = ConvertShapeToLength(fShapeB);
312 size_t ysize = ConvertShapeToLength(fShapeY);
313 if (bsize != ysize && !fNBroadcastedB.empty()) {
314 // include a separate scope to avoid defining unique operator temp variables
315 out << SP << "{\n";
316 out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::BroadcastConvBias<float>(tensor_" << fNB
317 << ", " << bsize << ", " << ConvertShapeToString(fShapeY) << ");\n";
318 out << SP << SP << "std::copy(data, data + " << ConvertShapeToLength(fShapeY) << ", tensor_" << fNBroadcastedB
319 << ");\n";
320 out << SP << SP << "delete[] data;\n";
321 out << SP << "}\n";
322 }
323 return out.str();
324}
325
326template <typename T>
328{
329 OpName = "op_" + OpName;
330
331 if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) {
332 throw std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first");
333 }
334
335 std::stringstream out;
336
337 size_t bsize = fShapeX[0];
338 size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; // kernel depth
339 size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; // kernel height
340 size_t kWidth = fShapeW[fDim + 1]; // kernel width
341
342 size_t iDepth = (fDim > 2) ? fShapeX[2] : 1; // input depth
343 size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1; // input height
344 size_t iWidth = fShapeX[fDim + 1]; // input width
345
346 size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; // output depth
347 size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; // ouput height
348 size_t oWidth = fShapeY[fDim + 1]; // output width
349
350 out << "\n//---- operator ConvTranspose " << OpName << "\n";
351
352 // create first matrix with convolution kernels
353 if (!fUseSession) {
354 size_t kernelSize = fAttrKernelShape[0];
355 if (fDim > 1)
356 kernelSize *= fAttrKernelShape[1];
357 out << SP << fType << " tensor_" << fNX << "_f[" << fShapeW[0] * fShapeW[1] * kernelSize << "] = {0};\n";
358 }
359
360 // vectorize the (dilated)convolution kernels into a matrix
361 // The shape of the kernel is W for 1d image, H x W for 2d image and D x H x W
362 // for 3d image
363 size_t id = (fDim > 2) ? fDim - 3 : 2;
364 size_t ih = (fDim > 1) ? fDim - 2 : 1;
365 size_t iw = fDim - 1;
366 size_t wstrideDil = fAttrDilations[iw];
367 size_t hstride = kWidth;
368 size_t hstrideDil = fAttrKernelShape[iw];
369 if (fDim > 1)
370 hstrideDil *= fAttrDilations[ih];
371 // stride dilated in the height
372 size_t dstride = kHeight * kWidth;
373 size_t dstrideDil = fAttrKernelShape[iw];
374 if (fDim > 1)
375 dstrideDil *= fAttrKernelShape[ih];
376 if (fDim > 2)
377 dstrideDil *= fAttrDilations[id];
378 size_t icstride = kHeight * kWidth * kDepth;
379 size_t icstrideDil = 1;
380 for (size_t i = 0; i < fDim; i++)
381 icstrideDil *= fAttrKernelShape[i];
382 size_t ocstride = fShapeW[1] * icstride;
383 size_t ocstrideDil = fShapeW[1] * icstrideDil;
384
385 // The shape of f is [M/group, kHeight x kWidth]
386 out << SP << "for (std::size_t ic = 0; ic < " << fShapeW[0] << "; ic++) {\n";
387 out << SP << SP << "for (std::size_t oc = 0; oc < " << fShapeW[1] << "; oc++) {\n";
388 // out << SP << SP << SP << "size_t kIndex = 0;\n"; // filter index
389 if (fDim > 2)
390 out << SP << SP << SP << "for (std::size_t kd = 0; kd < " << kDepth << "; kd++) {\n";
391 if (fDim > 1)
392 out << SP << SP << SP << "for (std::size_t kh = 0; kh < " << kHeight << "; kh++) {\n";
393 out << SP << SP << SP << SP << "for (std::size_t kw = 0; kw < " << kWidth << "; kw++) {\n";
394
395 out << SP << SP << SP << SP << SP << "tensor_" << fNX << "_f[ic * " << ocstrideDil << " + oc * " << icstrideDil;
396 if (fDim > 2)
397 out << " + kd * " << dstrideDil;
398 if (fDim > 1)
399 out << " + kh * " << hstrideDil;
400 out << " + kw * " << wstrideDil << " ] = tensor_" << fNW << "[ic * " << ocstride << " + oc * " << icstride;
401
402 if (fDim > 2)
403 out << " + kd * " << dstride;
404 if (fDim > 1)
405 out << " + kh * " << hstride;
406 out << " + kw ];\n";
407
408 // here we rotate the input kernel tranforming 0,1,2,...N-1 in N-1,N-2,...,2,1,0
409 // out << " + " << icstride -1 << " - kIndex ];\n"; // tranform 1,2,3,4 in 4,3,2,1
410 // out << SP << SP << SP << SP << SP << "kIndex++;\n"; // update input filter index
411
412 out << SP << SP << SP << SP << "}\n";
413 if (fDim > 1)
414 out << SP << SP << SP << "}\n";
415 if (fDim > 2)
416 out << SP << SP << SP << "}\n";
417
418 out << SP << SP << "}\n";
419 out << SP << "}\n";
420
421 out << SP << "char " << OpName << "_transA = 'N';\n";
422 out << SP << "char " << OpName << "_transB = 'T';\n";
423 out << SP << "int " << OpName << "_m = " << iHeight * iWidth * iDepth << ";\n";
424 out << SP << "int " << OpName << "_n = " << icstrideDil * fShapeW[1] << ";\n"; // output channels * filters
425 out << SP << "int " << OpName << "_k = " << fShapeW[0] << ";\n"; // input channels
426 out << SP << "float " << OpName << "_alpha = 1.0;\n";
427 out << SP << "float " << OpName << "_beta = 0.0;\n";
428
429 if (!fUseSession) {
430 out << SP << fType << " tensor_" << fNX << "_xcol[" << fShapeW[0] * icstrideDil * oDepth * oHeight * oWidth
431 << "] = {0};\n";
432 }
433
434 // Loop on batch size
435 out << SP << "for (size_t n = 0; n < " << bsize << "; n++) {\n";
436
437 // IM2COL: Unroll the input tensor
438 // order input data as (e.g. kernel 2x2) and (xa,ya) is channel 1 and (xb,yb) is channel 2
439 // (xa1,..,xak,ya1,..yak)(xb1,...,xbk,yb1,..,ybk)
440 // (xa2,...xak+1,ya1,...yak)(......)
441 // trick for speed is using caffe im2col and output a matrix which contains filtered values as rows.
442 // By doing this one has consecutive memory reads and writes
443 // Resulting matrix op_xcol is (output channels * filter_h * filter_w , output_h * output_w)
444 if (fDim == 1) {
445 if (fAttrPads[0] != fAttrPads[1]) {
446 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding "
447 << std::endl;
448 fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2;
449 }
450 fAttrPads[1] = 0;
451 }
452 if (fDim == 2) {
453 if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) {
454 std::cout << "TMVA SOFIE Operator ConvTranspose: asymmetric padding not supported. Assume an average padding "
455 << std::endl;
456 fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2;
457 fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2;
458 }
459 }
460 if (fDim == 3) {
461 if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) {
462 std::cout << "TMVA SOFIE Operator ConvTranspose: asymmetric padding not supported. Assume an average padding "
463 << std::endl;
464 fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2;
465 fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2;
466 fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2;
467 }
468 }
469
470 if (fAttrGroup == 1) {
471 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iDepth * iHeight * iWidth << ";\n";
472 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
473
474 // DO BLAS before:
475 // BLAS
476 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
477 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, "
478 << "tensor_" << fNX << " + x_offset, &" << OpName
479 << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
480 out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_n, &" << OpName << "_beta, tensor_" << fNX
481 << "_xcol, &" << OpName << "_m);\n";
482
483 // when using im2col - resulting matrix is transposed, is (input_c * filter_h * filter_w, output_h *
484 // output_w)
485 // before using col2im I need to transpose matrix
486 if (fDim < 3) {
487 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::col2im<float>(tensor_" << fNX
488 << "_xcol,"
489 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
490 // dilation_w,
491 << fShapeY[1] << "," << oHeight << "," << oWidth << ",";
492 if (fDim == 1)
493 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
494 << fAttrDilations[0];
495 else // dim ==2
496 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
497 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
498 << fAttrDilations[1];
499 out << ", tensor_" << fNY << " + out_offset);\n\n ";
500 } else {
501 // 3d : needs a col2im for 3d
502 throw std::runtime_error("TMVA SOFIE 3D Conv Transpose not yet supported");
503 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
504 << " + x_offset,"
505 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
506 // dilation_d, dilation_h, dilation_w,
507 //
508 << fShapeX[1] << "," << oDepth << "," << oHeight << "," << oWidth << "," << fAttrKernelShape[0] << ","
509 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1] << ","
510 << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
511 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ",tensor_" << fNX
512 << "_xcol);\n\n ";
513 }
514 // // BLAS
515 // out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
516 // << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName
517 // << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
518 // out << SP << SP << SP <<"tensor_" << fNX << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
519 // << " + out_offset, &" << OpName << "_m);\n";
520 } else {
521 // case of group transposed convolution
522 // Unroll (IM2COL) the input tensor- make loop on groups and repeat operations (IM2COL + GEMM for each
523 // group)
524 out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n";
525 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iHeight * iWidth << " + g * "
526 << fShapeX[1] * iHeight * iWidth / fAttrGroup << ";\n ";
527 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oHeight * oWidth << " + g * "
528 << fShapeY[1] * oHeight * oWidth / fAttrGroup << ";\n ";
529
530 // do BLAS here (LM: probably need an offset for op_f the kernels)
531 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
532 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, "
533 << "tensor_" << fNX << " + x_offset, &" << OpName
534 << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
535 out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_n, &" << OpName << "_beta, tensor_" << fNX
536 << "_xcol , &" << OpName << "_m);\n";
537
538 if (fDim < 3) {
539 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::col2im<float>(tensor_" << fNX
540 << "_xcol,"
541 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
542 // dilation_w,
543 << fShapeY[1] << "," << oHeight << "," << oWidth << ",";
544 if (fDim == 1)
545 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
546 << fAttrDilations[0];
547 else // dim ==2
548 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
549 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
550 << fAttrDilations[1];
551 out << ", tensor_" << fNY << " + out_offset);\n\n ";
552 } else {
553 // 3d im2col
554 throw std::runtime_error("TMVA SOFIE 3D Conv Transpose not yet supported");
555
556 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
557 << " + x_offset,"
558 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
559 // dilation_d, dilation_h, dilation_w,
560 //
561 << fShapeX[1] << "," << oDepth << "," << oHeight << "," << oWidth << "," << fAttrKernelShape[0] << ","
562 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1] << ","
563 << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
564 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << "," << "tensor_" << fNX
565 << "_xcol);\n\n ";
566 }
567
568 // // BLAS
569 // // offset g must be g * k * n
570 // out << SP << SP << SP << "size_t offset_f = g * " << fShapeW[0] * fShapeW[1] * icstrideDil / fAttrGroup <<
571 // ";\n"; out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName <<
572 // "_m, &"
573 // << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName
574 // << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
575 // out << SP << SP << SP << "tensor_" << fNX << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta,
576 // tensor_" << fNY
577 // << " + out_offset"
578 // << ", &" << OpName << "_m);\n";
579
580 out << SP << SP << "}\n"; // end of group loop
581 }
582
583 out << SP << "}\n"; // end of batch size loop
584
585 if (fNBroadcastedB != "") {
586 out << SP << "int " << OpName << "_size = " << fShapeY[0] * fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
587 out << SP << "float " << OpName << "_gamma = 1.0;\n";
588 out << SP << "int " << OpName << "_incx = 1;\n";
589 out << SP << "int " << OpName << "_incy = 1;\n";
590
591 out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNBroadcastedB << ", &"
592 << OpName << "_incx, tensor_" << fNY << ", &" << OpName << "_incy);\n";
593 }
594
595 return out.str();
596}
597
598} // namespace TMVA::Experimental::SOFIE
599
600#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
std::vector< size_t > GetTensorShape(const std::string &name) const
Definition RModel.cxx:29
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:262
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:122
std::shared_ptr< void > GetInitializedTensorData(std::string tensor_name)
Definition RModel.cxx:327
ETensorType GetTensorType(std::string name) const
Definition RModel.cxx:90
void UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector< std::size_t > shape, std::shared_ptr< void > data)
Definition RModel.cxx:318
std::vector< std::string > GetBlasRoutines() override
Returns the blas routines needed to compile the generated code.
ROperator_ConvTranspose(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > outputPadding, std::vector< size_t > outputShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameB, std::string nameY)
Constructor of ROperator_ConvTranspose from the attributes.
void Initialize(RModel &) override
Initialize the model.
ROperator_ConvTranspose()
Default constructor of ROperator_ConvTranspose.
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
Infers the type of the output tensor.
std::string GenerateInitCode() override
Generate code for initializing the op.
std::string Generate(std::string opName) override
Generate the inference code.
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > >) override
Infers the shape of the input tensors.
std::vector< std::string_view > fInputTensorNames
Definition ROperator.hxx:49
std::vector< std::string_view > fOutputTensorNames
Definition ROperator.hxx:50
std::size_t ConvertShapeToLength(const std::vector< size_t > &shape)
ETensorType ConvertStringToType(std::string type)
std::string ConvertShapeToString(const std::vector< size_t > &shape)