Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_Conv.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_CONV
2#define TMVA_SOFIE_ROPERATOR_CONV
3
5#include "TMVA/ROperator.hxx"
6#include "TMVA/RModel.hxx"
7
8#include <memory>
9#include <sstream>
10#include <algorithm>
11#include <stdexcept>
12#include <vector>
13#include <cassert>
14
15namespace TMVA {
16namespace Experimental {
17namespace SOFIE {
18
19template<typename T>
21{
22private:
23 bool fBroadcastBias = false;
24
25 std::string fAttrAutopad;
26 std::vector<size_t> fAttrDilations;
27 size_t fAttrGroup;
28 std::vector<size_t> fAttrKernelShape;
29 std::vector<size_t> fAttrPads;
30 std::vector<size_t> fAttrStrides;
31
32 std::string fNX;
33 std::string fNW;
34 std::string fNB;
35 std::string fNY;
36
37 std::string convK;
38 std::string imcol;
39
40 std::vector<Dim> fShapeX;
41 std::vector<size_t> fShapeW;
42 std::vector<size_t> fShapeB;
43 std::vector<Dim> fShapeY;
44
45 std::string fType;
46
47 size_t fDim; // dimension of the convolution
48
49
50public:
51
53
54 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
55 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
56 std::vector<size_t> strides, std::string nameX, std::string nameW,
57 std::string nameB, std::string nameY):
59 fAttrPads(pads), fAttrStrides(strides),
60 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)),
61 fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY))
62 {
63 if(std::is_same<T, float>::value) {
64 fType = "float";
65 } else {
66 throw
67 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
68 }
71 }
72
73 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
74 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
75 std::vector<size_t> strides, std::string nameX, std::string nameW,
76 std::string nameY):
78 fAttrPads(pads), fAttrStrides(strides),
79 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)), fNY(UTILITY::Clean_name(nameY))
80 {
81 if(std::is_same<T, float>::value) {
82 fType = "float";
83 } else {
84 throw
85 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
86 }
89 }
90
91 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
92 ETensorType out = input[0];
93 return {out};
94 }
95
96 // function returning output shape given input
97 std::vector<Dim> DoShapeInference(const std::vector<Dim> & input, const std::vector<size_t> & weight) {
98 // shape of convolution input has to be (according to ONNX): N x C x H x W
99 // Where N : batch size, C : input channels, H : input height, W : input width
100
101 if (input.size() -2 != fDim) {
102 throw std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid input ");
103 }
104 if (weight.size() -2 != fDim) {
105 throw std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid weights ");
106 }
107 if (fAttrGroup == 0 && input[1].isParam)
108 throw std::runtime_error("TMVA SOFIE Conv - param shapes not supported without group attr");
109 if (fAttrKernelShape.empty()) {
110 if (input[2].isParam || (fDim > 1 && input[3].isParam) || (fDim > 2 && input[4].isParam))
111 throw std::runtime_error("TMVA SOFIE Conv - param shapes not supported without kernel attr");
112 }
113
114 if (fAttrGroup == 0) {
115 fAttrGroup = input[1].dim / weight[1];
116 }
117
118 // kernel shape
119 size_t k1 = ((fAttrKernelShape.empty())? weight[2] : fAttrKernelShape[0]);
120 size_t k2 = (fDim > 1) ? ((fAttrKernelShape.empty()) ? weight[3] : fAttrKernelShape[1]) : 1;
121 size_t k3 = (fDim > 2) ? ((fAttrKernelShape.empty()) ? weight[4] : fAttrKernelShape[2]) : 1;
122
123
124 size_t i1 = (fDim > 1) ? ((fDim > 2) ? 3 : 2) : 1;
125 size_t i2 = (fDim > 2) ? 4 : 3;
126 size_t i3 = 5;
127
128 if (fAttrDilations.empty()) {
129 fAttrDilations = {1, 1, 1};
130 }
131 fAttrDilations.resize(3);
132 if (fDim < 3) {
133 fAttrDilations.resize(3, 1);
134 }
135 // Shape of the kernel
136 fAttrKernelShape = {k1 + (fAttrDilations[0] - 1) * (k1 - 1),
137 k2 + (fAttrDilations[1] - 1) * (k2 - 1),
138 k3 + (fAttrDilations[2] - 1) * (k3 - 1)};
139
140 if (fAttrStrides.empty()) {
141 fAttrStrides = {1, 1, 1};
142 }
143 if (fDim < 3)
144 fAttrStrides.resize(3, 1);
145
146 if (fAttrAutopad == "NOTSET") {
147 if (fAttrPads.empty()) {
148 fAttrPads = {1, 1, 1, 1, 1, 1};
149 }
150 } else if (fAttrAutopad == "SAME_UPPER" || fAttrAutopad == "SAME_LOWER") {
151 for (size_t d = 0; d < fDim; ++d) {
152 if (input[d + 2].isParam)
153 throw std::runtime_error(
154 "TMVA SOFIE Conv Op: SAME padding with parametric input shape is not supported");
155 }
156 // ONNX SAME padding: total_pad = max(0, (ceil(in/stride)-1)*stride + kernel - in)
157 // SAME_UPPER places extra padding at end, SAME_LOWER at beginning
158 fAttrPads.assign(6, 0);
159 for (size_t d = 0; d < fDim; ++d) {
160 size_t inSize = input[d + 2].dim;
161 size_t stride_d = fAttrStrides[d];
162 size_t outSize = (inSize + stride_d - 1) / stride_d;
163 int totalPad = std::max(0, (int)((outSize - 1) * stride_d + fAttrKernelShape[d]) - (int)inSize);
164 if (fAttrAutopad == "SAME_UPPER") {
165 fAttrPads[d] = (size_t)(totalPad / 2);
166 fAttrPads[d + fDim] = (size_t)(totalPad - totalPad / 2);
167 } else {
168 fAttrPads[d] = (size_t)(totalPad - totalPad / 2);
169 fAttrPads[d + fDim] = (size_t)(totalPad / 2);
170 }
171 }
172 } else if (fAttrAutopad != "VALID") {
173 throw
174 std::runtime_error("TMVA SOFIE Conv Op invalid fAutopad");
175 }
176 // to be sure pad is vector of size 6
177 if (fDim < 3) fAttrPads.resize(6, 0);
178
179 Dim input1 = input[2];
180 Dim input2 = (fDim > 1) ? input[3] : Dim{1};
181 Dim input3 = (fDim > 2) ? input[4] : Dim{1};
182
183 size_t pad1 = fAttrPads[0] + fAttrPads[i1];
184
185 // function to get output dimension of convolution given input
186
187 auto computeOutput = [&](Dim inputDim, size_t kernel, size_t pad, size_t stride) {
188 if (!inputDim.isParam) {
189 size_t outSize = (inputDim.dim + pad - kernel) / stride + 1;
190 return Dim{outSize};
191 } else {
192 if (stride == 1){
193 if ((pad - kernel + 1) == 0 )
194 // output is same as input
195 return inputDim;
196 else {
197 int64_t v = pad - kernel + 1;
198 std::string outStr = "(" + inputDim.param + "+" + std::to_string(v) + ")";
199 return Dim{ outStr, static_cast<size_t>(-1)};
200 }
201 } else { // general case (stride not 1)
202 int64_t v = pad - kernel;
203 std::string outStr = "((" + inputDim.param + "+" + std::to_string(v) + ")/"
204 + std::to_string(stride) + "1)";
205 return Dim{ outStr, static_cast<size_t>(-1)};
206 }
207 }
208 throw std::runtime_error("TMVA SOFIE Conv Op - invalid values");
209 return Dim{};
210 };
211
213
214 Dim batch_size = input[0]; // first element in input tensor
215 Dim output_channels = Dim{weight[0]}; // first element in weight tensor
216
217 std::vector<Dim> ret({ batch_size, output_channels, output1 });
218
219 if (fDim == 1)
220 return ret;
221
222 size_t pad2 = fAttrPads[1] + fAttrPads[i2];
224
225 // output is N x M x OH x OW
226 ret.push_back(output2);
227 if (fDim == 2)
228 return ret;
229
230 size_t pad3 = fAttrPads[2] + fAttrPads[i3];
232
233 // output is N x M x OH x OW x OD
234 ret.push_back(output3);
235 return ret;
236 }
237
238 void Initialize(RModel& model) override {
239 fUseSession = model.UseSession();
240 if (!model.CheckIfTensorAlreadyExist(fNX)) {
241 throw
242 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model");
243 }
245 if (fShapeX.size() < 3 || fShapeX.size() > 5) {
246 std::cout << fNX << " : " << ConvertDimShapeToString(fShapeX) << std::endl;
247 throw
248 std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions");
249 }
250 fDim = fShapeX.size() - 2;
251 if (!model.CheckIfTensorAlreadyExist(fNW)) {
252 throw
253 std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model");
254 }
255 fShapeW = model.GetTensorShape(fNW);
256 if (fShapeW.size() < 3 || fShapeW.size() > 5) {
257 std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl;
258 throw std::runtime_error("TMVA SOFIE Conv Op input weight tensor" + fNW + " is not of 3,4 or 5 dimensions");
259 }
262 if (fNB != "") {
263 if (!model.CheckIfTensorAlreadyExist(fNB)) {
264 throw
265 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model");
266 }
267 fShapeB = model.GetTensorShape(fNB);
268 if (fShapeB.size() != 1)
269 throw
270 std::runtime_error("TMVA SOFIE Conv op : invalid shape for Bias tensor (is not 1D)");
271 std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());
272 auto shapeDimB = model.GetDimTensorShape(fNB);
274 if (broadcast_needed) {
276 // make bias shape equal to Y shape by adding 1
277 if (fShapeB.size() < 1)
278 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape");
279 // we assume bias tensor dimension is equal to number of filters that is the second dimension in
280 // the output tensor
281 if (!(shapeDimB[0] == fShapeY[1]))
282 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " +
284 if (fType != "float")
285 throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported");
286 // here is the actual broadcasting
287 fBroadcastBias = true;
288 if (!fUseSession) {
289 // do here broadcasting
290 std::vector<size_t> shape(fDim + 1, 1);
291 shape[0] = fShapeB[0];
293 std::shared_ptr<void> new_data_ptr(
294 UTILITY::UnidirectionalBroadcast(static_cast<float *>(original_data.get()), shape, intTargetShape),
295 std::default_delete<float[]>());
297 fShapeB = model.GetTensorShape(fNB);
298 }
299 }
300 }
301 // output channel size can be parametric and is an expression
302 std::vector<Dim> outputDims = std::vector<Dim>(fShapeY.begin()+2, fShapeY.end());
303 //check if shape is not parametric
304 std::vector<size_t> outputInts = ConvertShapeToInt(outputDims);
306 if (outputInts.empty()) {
307 auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W
308 channelDim = Dim{ outputChannelSize, static_cast<size_t>(-1)};
309 } else {
312 }
313 size_t kernelSize = fAttrKernelShape[0];
314 for (size_t i = 1; i < fDim; i++) {
316 }
317
318 std::vector<size_t> shape1 = {fShapeW[0], fShapeW[1], kernelSize};
319 std::vector<Dim> shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, channelDim };
322 convK = fNX +"_f";
323 imcol = fNX +"_xcol";
324 fOutputTensorNames.emplace_back(convK);
325 fOutputTensorNames.emplace_back(imcol);
326 fInputTensorNames.emplace_back(convK);
327 fInputTensorNames.emplace_back(imcol);
328
329 if (model.Verbose()) {
330 std::cout << "Conv - " << fDim << " " << fNX << " : " << ConvertDimShapeToString(fShapeX)
331 << " --> " << fNY << " : " << ConvertDimShapeToString(fShapeY) << std::endl;
332 }
333 }
334
335 std::string GenerateInitCode() override {
336 std::stringstream out;
337 // Generate initialization code for broadcasting of bias tensor
338 if (fBroadcastBias) {
339 // include a separate scope to avoid defining unique operator temp variables
340 std::vector<size_t> shape(fDim + 1, 1);
341 // bias (is a 1D tensor)
342 shape[0] = fShapeB[0];
343 std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());
344 out << "//--- broadcast bias tensor " << fNB << "for Conv op if needed \n";
345 // in case of dynamic tensors check needs to be done at run time
348 if (isOutDynamic)
349 out << SP << "if (" << length << " > " << ConvertShapeToLength(shape) << ") {\n";
350 else
351 out << SP << "{\n";
352 out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_"
353 << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertDimShapeToString(fShapeY) << ");\n";
354 out << SP << SP << "fTensor_" << fNB << ".resize(" << length << ");\n";
355 out << SP << SP << "std::copy(data, data + " << length << ", fTensor_" << fNB << ".begin());\n";
356 out << SP << SP << "tensor_" << fNB << " = fTensor_" << fNB << ".data();\n";
357 out << SP << SP << "delete[] data;\n";
358 out << SP << "}\n";
359 }
360 return out.str();
361 }
362
363 std::string Generate(std::string OpName) override {
364 OpName = "op_" + OpName;
365
366 if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) {
367 throw
368 std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first");
369 }
370
371 std::stringstream out;
372 auto bsize = fShapeX[0];
373 size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; // kernel depth
374 size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; // kernel height
375 size_t kWidth = fShapeW[fDim+1]; // kernel width
376 auto iDepth = (fDim > 2) ? fShapeX[2] : Dim{1}; // input depth
377 auto iHeight = (fDim > 1) ? fShapeX[fDim] : Dim{1}; // input height
378 auto iWidth = fShapeX[fDim+1]; // input width
379 auto oDepth = (fDim > 2) ? fShapeY[2] : Dim{1}; // output depth
380 auto oHeight = (fDim > 1) ? fShapeY[fDim] : Dim{1}; // ouput height
381 auto oWidth = fShapeY[fDim+1]; // output width
382 // total output size for a channel
383 auto outputChannelStride = ConvertDimShapeToLength(std::vector<Dim>{oDepth, oHeight, oWidth}); // size of channel = D * H * W
384 auto outputBatchStride = ConvertDimShapeToLength(std::vector<Dim>{fShapeY[1] , oDepth, oHeight, oWidth}); // size of C * D * H * W
385 // input size
387 auto inputBatchStride = ConvertDimShapeToLength(std::vector<Dim>{fShapeX[1] , iDepth, iHeight, iWidth}); // size of C * D * H * W
388
389 out << "\n//---- operator Conv " << OpName << "\n";
390
391 // vectorize the (dilated)convolution kernels into a matrix
392 // no need to transpose the matrix
393 // to fix for 1d and 3d
394
395 size_t id = (fDim > 2) ? fDim-3 : 2;
396 size_t ih = (fDim > 1) ? fDim-2 : 1;
397 size_t iw = fDim-1;
398
399 size_t wstrideDil = fAttrDilations[iw];
400 size_t hstride = kWidth;
401 size_t hstrideDil = fAttrDilations[ih] * fAttrKernelShape[iw]; // stride dilated in the height
402 size_t dstride = kHeight * kWidth;
404 size_t icstride = kHeight * kWidth * kDepth;
406 size_t ocstride = fShapeW[1] * icstride;
407 size_t ocstrideDil = fShapeW[1] * icstrideDil;
408
409 out << SP << "for (std::size_t oc = 0; oc < " << fShapeW[0] << "; oc++) {\n";
410 out << SP << SP << "for (std::size_t ic = 0; ic < " << fShapeW[1] << "; ic++) {\n";
411 if (fDim > 2)
412 out << SP << SP << SP << "for (std::size_t kd = 0; kd < " << kDepth << "; kd++) {\n";
413 if (fDim > 1)
414 out << SP << SP << SP << "for (std::size_t kh = 0; kh < " << kHeight << "; kh++) {\n";
415 out << SP << SP << SP << SP << "for (std::size_t kw = 0; kw < " << kWidth << "; kw++) {\n";
416
417 out << SP << SP << SP << SP << SP << "tensor_" <<fNX << "_f[oc * "
418 << ocstrideDil << " + ic * " << icstrideDil;
419 if (fDim > 2) out << " + kd * " << dstrideDil;
420 if (fDim > 1) out << " + kh * " << hstrideDil;
421 out << " + kw * " << wstrideDil << " ] = tensor_" << fNW << "[oc * " << ocstride << " + ic * " << icstride;
422 if (fDim > 2) out << " + kd * " << dstride;
423 if (fDim > 1) out << " + kh * " << hstride;
424 out << " + kw ];\n";
425
426 out << SP << SP << SP << SP << "}\n";
427 if (fDim > 1) out << SP << SP << SP << "}\n";
428 if (fDim > 2) out << SP << SP << SP << "}\n";
429 out << SP << SP << "}\n";
430 out << SP << "}\n";
431
432 //out << SP << "char " << OpName << "_transA = 'T';\n";
433 out << SP << "char " << OpName << "_transA = 'N';\n";
434 out << SP << "char " << OpName << "_transB = 'N';\n";
435 out << SP << "int " << OpName << "_m = " << outputChannelStride << ";\n"; // output h*w
436 assert(fShapeY[1] == fShapeW[0]);
437 //assert(fShapeW[1] == fShapeX[1] / fAttrGroup);
438 out << SP << "int " << OpName << "_n = " << fShapeW[0] << ";\n"; // output channels
439 out << SP << "int " << OpName << "_k = " << fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] << ";\n";
440 out << SP << "float " << OpName << "_alpha = 1.0;\n";
441 if (fNB != "")
442 out << SP << "float " << OpName << "_beta = 1.0;\n";
443 else // when bias is not present beta needs to be equal to zero to avoid re-using previous results in output tensor
444 out << SP << "float " << OpName << "_beta = 0.0;\n";
445
446
447 // Loop on batch size
448 out << SP << "for (size_t n = 0; n < " << bsize << "; n++) {\n";
449
450 // IM2COL: Unroll the input tensor
451 // order input data as (e.g. kernel 2x2) and (xa,ya) is channel 1 and (xb,yb) is channel 2
452 // (xa1,..,xak,ya1,..yak)(xb1,...,xbk,yb1,..,ybk)
453 // (xa2,...xak+1,ya1,...yak)(......)
454 // trick for speed is using caffe im2col and output a matrix which contains filtered values as rows.
455 // By doing this one has consecutive memory reads and writes
456 // Resulting matrix op_xcol is (input channels * filter_h * filter_w , output_h * output_w)
457 if (fDim ==1) {
458 if (fAttrPads[0] != fAttrPads[1] ) {
459 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding "
460 << std::endl;
461 fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2;
462 }
463 fAttrPads[1] = 0;
464 fAttrStrides[1] = 1;
465 }
466 if (fDim == 2) {
467 if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) {
468 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
469 fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2;
470 fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2;
471 }
472 }
473 if (fDim == 3) {
474 if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) {
475 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
476 fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2;
477 fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2;
478 fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2;
479 }
480 }
481 out << SP << SP << "size_t out_offset = n * " << outputBatchStride << ";\n";
482
483 if (fAttrGroup == 1) {
484 out << SP << SP << "size_t x_offset = n * " << inputBatchStride << ";\n";
485 // when using im2col - resulting matrix is transposed, the dimension is (input_c * filter_h * filter_y, output_h *
486 // output_w)
487 if (fDim < 3) {
488 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
489 << " + x_offset,"
490 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
491 // dilation_w,
492 //
493 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
494 if (fDim == 1)
495 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
496 << fAttrDilations[0];
497 else // dim ==2
498 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
499 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
500 << fAttrDilations[1];
501 out << "," << "tensor_" <<fNX << "_xcol);\n\n ";
502 } else {
503 // 3d im2col
504 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
505 << " + x_offset,"
506 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
507 // dilation_d, dilation_h, dilation_w,
508 //
509 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << ","
510 << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << ","
511 << fAttrPads[0] << "," << fAttrPads[1] << "," << fAttrPads[2] << ","
512 << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
513 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ","
514 << "tensor_" << fNX << "_xcol);\n\n ";
515 }
516 // BLAS
517 out << SP << "TMVA::Experimental::SOFIE::Gemm_Call("
518 << "tensor_" << fNY << " + out_offset, false, false, "
519 << OpName << "_m, " << OpName << "_n, " << OpName << "_k, "
520 << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, tensor_" << fNX << "_f, "
521 << OpName << "_beta, ";
522 if (fNB != "")
523 out << "tensor_" << fNB;
524 else
525 out << "nullptr";
526 out << ");\n";
527
528
529 // out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
530 // << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, &" << OpName
531 // << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
532 // out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
533 // << " + out_offset, &" << OpName << "_m);\n";
534 } else {
535 // case of group convolution
536 // Unroll (IM2COL) the input tensor- make loop on groups and repeat operations (IM2COL + GEMM for each
537 // group)
538 // out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
539 out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n";
540 out << SP << SP << "size_t x_offset = n * " << inputBatchStride << " + g * "
541 << fShapeW[1] << " * " << inputChannelStride << ";\n ";
542 out << SP << SP << "size_t g_offset = g * " << fShapeW[0] << " * (" << outputChannelStride << ") / " << fAttrGroup << ";\n ";
543 out << SP << SP << "size_t out_offset = n * " << outputBatchStride << " + g_offset;\n";
544
545 if (fDim < 3) {
546 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
547 << " + x_offset,"
548 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
549 // dilation_w,
550 //
551 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
552 if (fDim == 1)
553 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
554 << fAttrDilations[0];
555 else // dim ==2
556 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
557 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
558 << fAttrDilations[1];
559 out << ", tensor_" << fNX << "_xcol);\n\n ";
560 } else {
561 // 3d im2col
562 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
563 << " + x_offset,"
564 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
565 // dilation_d, dilation_h, dilation_w,
566 //
567 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << "," << fAttrKernelShape[0] << ","
568 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1]
569 << "," << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2]
570 << "," << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ",tensor_" << fNX
571 << "_xcol);\n\n ";
572 }
573
574 // BLAS
575 // n must be divided by the number of groups
576 out << SP << SP << SP << OpName << "_n = " << fShapeW[0] / fAttrGroup << ";\n";
577 // offset g must be g * k * n
578 out << SP << SP << SP << "size_t offset_f = g * "
580 << ";\n";
581
582 out << SP << "TMVA::Experimental::SOFIE::Gemm_Call("
583 << "tensor_" << fNY << " + out_offset, false, false, "
584 << OpName << "_m, " << OpName << "_n, " << OpName << "_k, "
585 << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, tensor_" << fNX << "_f + offset_f, "
586 << OpName << "_beta, ";
587 if (fNB != "")
588 out << "tensor_" << fNB << " + g_offset";
589 else
590 out << "nullptr";
591 out << ");\n";
592
593 // out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
594 // << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName
595 // << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
596 // out << SP << SP << SP << "tensor_" << fNX << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
597 // << " + out_offset"
598 // << ", &" << OpName << "_m);\n";
599
600 out << SP << SP << "}\n"; // end of group loop
601 }
602
603 // if (fNB != "") {
604 // out << SP << "int " << OpName << "_size = " << outputBatchStride << ";\n";
605 // out << SP << "float " << OpName << "_gamma = 1.0;\n";
606 // out << SP << "int " << OpName << "_incx = 1;\n";
607 // out << SP << "int " << OpName << "_incy = 1;\n";
608
609 // out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB << ", &"
610 // << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n";
611
612 // }
613 out << SP << "}\n"; // end of batch size loop
614
615 return out.str();
616 }
617
618 /*! \brief Returns the blas routines needed to compile the generated code
619 */
620 std::vector<std::string> GetBlasRoutines() override { return { std::string("Gemm"), std::string("Axpy") }; }
621};
622
623} // namespace SOFIE
624} // namespace Experimental
625} // namespace TMVA
626
627#endif
#define d(i)
Definition RSha256.hxx:102
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
std::vector< size_t > GetTensorShape(const std::string &name) const
Definition RModel.cxx:51
std::vector< Dim > GetDimTensorShape(const std::string &name) const
Definition RModel.cxx:87
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:284
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:144
std::shared_ptr< void > GetInitializedTensorData(std::string tensor_name)
Definition RModel.cxx:349
ETensorType GetTensorType(std::string name) const
Definition RModel.cxx:112
void UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector< std::size_t > shape, std::shared_ptr< void > data)
Definition RModel.cxx:340
std::string Generate(std::string OpName) override
ROperator_Conv(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameB, std::string nameY)
std::vector< std::string > GetBlasRoutines() override
Returns the blas routines needed to compile the generated code.
void Initialize(RModel &model) override
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
ROperator_Conv(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameY)
std::vector< Dim > DoShapeInference(const std::vector< Dim > &input, const std::vector< size_t > &weight)
std::vector< std::string_view > fInputTensorNames
Definition ROperator.hxx:50
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:45
bool fUseSession
flag to identify if using the session class
Definition ROperator.hxx:46
std::vector< std::string_view > fOutputTensorNames
Definition ROperator.hxx:51
bool AreSameShape(const std::vector< size_t > &, const std::vector< size_t > &)
T * UnidirectionalBroadcast(const T *data, const std::vector< size_t > &shape, const std::vector< size_t > &targetShape)
std::string ConvertDimShapeToString(const std::vector< Dim > &shape)
std::size_t ConvertShapeToLength(const std::vector< size_t > &shape)
std::vector< size_t > ConvertShapeToInt(const std::vector< Dim > &shape)
Convert shape based on Dim to integer format.
ETensorType ConvertStringToType(std::string type)
std::string ConvertDimShapeToLength(const std::vector< Dim > &shape)
std::string ConvertShapeToString(const std::vector< size_t > &shape)
create variable transformations