Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_Conv.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_CONV
2#define TMVA_SOFIE_ROPERATOR_CONV
3
5#include "TMVA/ROperator.hxx"
6#include "TMVA/RModel.hxx"
7
8#include <memory>
9#include <sstream>
10#include <algorithm>
11#include <stdexcept>
12#include <vector>
13#include <cassert>
14
15namespace TMVA {
16namespace Experimental {
17namespace SOFIE {
18
19template<typename T>
21{
22private:
23 std::string fAttrAutopad;
24 std::vector<size_t> fAttrDilations;
25 size_t fAttrGroup;
26 std::vector<size_t> fAttrKernelShape;
27 std::vector<size_t> fAttrPads;
28 std::vector<size_t> fAttrStrides;
29
30 std::string fNX;
31 std::string fNW;
32 std::string fNB;
33 std::string fNB2; // bias tensor name after broadcasting
34 std::string fNY;
35
36 std::string convK;
37 std::string imcol;
38
39 std::vector<Dim> fShapeX;
40 std::vector<size_t> fShapeW;
41 std::vector<size_t> fShapeB;
42 std::vector<Dim> fShapeY;
43
44 std::string fType;
45
46 size_t fDim; // dimension of the convolution
47
48
49public:
50
52
53 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
54 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
55 std::vector<size_t> strides, std::string nameX, std::string nameW,
56 std::string nameB, std::string nameY):
58 fAttrPads(pads), fAttrStrides(strides),
59 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)),
60 fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY))
61 {
62 if(std::is_same<T, float>::value) {
63 fType = "float";
64 } else {
65 throw
66 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
67 }
70 }
71
72 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
73 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
74 std::vector<size_t> strides, std::string nameX, std::string nameW,
75 std::string nameY):
77 fAttrPads(pads), fAttrStrides(strides),
78 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)), fNY(UTILITY::Clean_name(nameY))
79 {
80 if(std::is_same<T, float>::value) {
81 fType = "float";
82 } else {
83 throw
84 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
85 }
88 }
89
90 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
91 ETensorType out = input[0];
92 return {out};
93 }
94
95 // function returning output shape given input
96 std::vector<Dim> DoShapeInference(const std::vector<Dim> & input, const std::vector<size_t> & weight) {
97 // shape of convolution input has to be (according to ONNX): N x C x H x W
98 // Where N : batch size, C : input channels, H : input height, W : input width
99
100 if (input.size() -2 != fDim) {
101 throw std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid input ");
102 }
103 if (weight.size() -2 != fDim) {
104 throw std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid weights ");
105 }
106 if (fAttrGroup == 0 && input[1].isParam)
107 throw std::runtime_error("TMVA SOFIE Conv - param shapes not supported without group attr");
108 if (fAttrKernelShape.empty()) {
109 if (input[2].isParam || (fDim > 1 && input[3].isParam) || (fDim > 2 && input[4].isParam))
110 throw std::runtime_error("TMVA SOFIE Conv - param shapes not supported without kernel attr");
111 }
112
113 if (fAttrGroup == 0) {
114 fAttrGroup = input[1].dim / weight[1];
115 }
116
117 // kernel shape
118 size_t k1 = ((fAttrKernelShape.empty())? weight[2] : fAttrKernelShape[0]);
119 size_t k2 = (fDim > 1) ? ((fAttrKernelShape.empty()) ? weight[3] : fAttrKernelShape[1]) : 1;
120 size_t k3 = (fDim > 2) ? ((fAttrKernelShape.empty()) ? weight[4] : fAttrKernelShape[2]) : 1;
121
122
123 size_t i1 = (fDim > 1) ? ((fDim > 2) ? 3 : 2) : 1;
124 size_t i2 = (fDim > 2) ? 4 : 3;
125 size_t i3 = 5;
126
127 if (fAttrDilations.empty()) {
128 fAttrDilations = {1, 1, 1};
129 }
130 fAttrDilations.resize(3);
131 if (fDim < 3) {
132 fAttrDilations.resize(3, 1);
133 }
134 // Shape of the kernel
135 fAttrKernelShape = {k1 + (fAttrDilations[0] - 1) * (k1 - 1),
136 k2 + (fAttrDilations[1] - 1) * (k2 - 1),
137 k3 + (fAttrDilations[2] - 1) * (k3 - 1)};
138
139 if (fAttrAutopad == "NOTSET") {
140 if (fAttrPads.empty()) {
141 fAttrPads = {1, 1, 1, 1, 1, 1};
142 }
143 } else if (fAttrAutopad == "SAME_UPPER" || fAttrAutopad == "SAME_LOWER") {
144 if (fDim == 1)
146 else if (fDim == 2)
148 else if (fDim == 3)
150 fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[2] / 2};
151 // add extra padding at beginning or end (depending if SAME_UPPER or SAME_LOWER)
152 // need to check this!
153 if (fAttrKernelShape[0] % 2 == 1) {
154 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[0]++ : fAttrPads[i1]++;
155 }
156 if (fDim > 1 && fAttrKernelShape[1] % 2 == 1) {
157 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[1]++ : fAttrPads[i2]++;
158 }
159 if (fDim > 2 && fAttrKernelShape[2] % 2 == 1) {
160 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[2]++ : fAttrPads[i3]++;
161 }
162 } else if (fAttrAutopad != "VALID") {
163 throw
164 std::runtime_error("TMVA SOFIE Conv Op invalid fAutopad");
165 }
166 // to be sure pad is vector of size 6
167 if (fDim < 3) fAttrPads.resize(6, 0);
168
169 if (fAttrStrides.empty()) {
170 fAttrStrides = {1, 1, 1};
171 }
172 if (fDim < 3)
173 fAttrStrides.resize(3, 1);
174
175
176 Dim input1 = input[2];
177 Dim input2 = (fDim > 1) ? input[3] : Dim{1};
178 Dim input3 = (fDim > 2) ? input[4] : Dim{1};
179
180 size_t pad1 = fAttrPads[0] + fAttrPads[i1];
181
182 // function to get output dimension of convolution given input
183
184 auto computeOutput = [&](Dim inputDim, size_t kernel, size_t pad, size_t stride) {
185 if (!inputDim.isParam) {
186 size_t outSize = (inputDim.dim + pad - kernel) / stride + 1;
187 return Dim{outSize};
188 } else {
189 if (stride == 1){
190 if ((pad - kernel + 1) == 0 )
191 // output is same as input
192 return inputDim;
193 else {
194 int64_t v = pad - kernel + 1;
195 std::string outStr = "(" + inputDim.param + "+" + std::to_string(v) + ")";
196 return Dim{ outStr, static_cast<size_t>(-1)};
197 }
198 } else { // general case (stride not 1)
199 int64_t v = pad - kernel;
200 std::string outStr = "((" + inputDim.param + "+" + std::to_string(v) + ")/"
201 + std::to_string(stride) + "1)";
202 return Dim{ outStr, static_cast<size_t>(-1)};
203 }
204 }
205 std::runtime_error("TMVA SOFIE Conv Op - invalid values");
206 return Dim{};
207 };
208
210
211 Dim batch_size = input[0]; // first element in input tensor
212 Dim output_channels = Dim{weight[0]}; // first element in weight tensor
213
214 std::vector<Dim> ret({ batch_size, output_channels, output1 });
215
216 if (fDim == 1)
217 return ret;
218
219 size_t pad2 = fAttrPads[1] + fAttrPads[i2];
221
222 // output is N x M x OH x OW
223 ret.push_back(output2);
224 if (fDim == 2)
225 return ret;
226
227 size_t pad3 = fAttrPads[2] + fAttrPads[i3];
229
230 // output is N x M x OH x OW x OD
231 ret.push_back(output3);
232 return ret;
233 }
234
235 void Initialize(RModel& model) override {
236 fUseSession = model.UseSession();
237 if (!model.CheckIfTensorAlreadyExist(fNX)) {
238 throw
239 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model");
240 }
242 if (fShapeX.size() < 3 || fShapeX.size() > 5) {
243 std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl;
244 throw
245 std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions");
246 }
247 fDim = fShapeX.size() - 2;
248 if (!model.CheckIfTensorAlreadyExist(fNW)) {
249 throw
250 std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model");
251 }
252 fShapeW = model.GetTensorShape(fNW);
253 if (fShapeW.size() < 3 || fShapeW.size() > 5) {
254 std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl;
255 throw std::runtime_error("TMVA SOFIE Conv Op input weight tensor" + fNW + " is not of 3,4 or 5 dimensions");
256 }
259 if (fNB != "") {
260 if (!model.CheckIfTensorAlreadyExist(fNB)) {
261 throw
262 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model");
263 }
264 fShapeB = model.GetTensorShape(fNB);
265 std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());
266 auto shapeDimB = model.GetDimTensorShape(fNB);
268 if (broadcast_needed) {
270 // make bias shape equal to Y shape by adding 1
271 if (fShapeB.size() < 1)
272 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape");
273 // we assume bias tensor dimension is equal to number of filters that is the second dimension in
274 // the output tensor
275 if (!(shapeDimB[0] == fShapeY[1]))
276 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " +
278 if (fType != "float")
279 throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported");
280 // here is the actual broadcasting
281 if (!fUseSession) {
282 std::vector<size_t> shape(fDim + 1, 1);
283 shape[0] = fShapeB[0];
285 std::shared_ptr<void> new_data_ptr(
286 UTILITY::UnidirectionalBroadcast<float>(static_cast<float *>(original_data.get()), shape, intTargetShape),
287 std::default_delete<float[]>());
289 fShapeB = model.GetTensorShape(fNB);
290 fNB2 = fNB; // use same name
291 }
292 else {
293 // In case of session add broadcasting code in Session constructor and in GenerateInitCode
294 // we need to add a new intermediate tensor for broadcasted bias tensor
295 fNB2 = fNB + "bcast";
297 }
298 }
299 }
300 // output channel size can be parametric
301 std::vector<Dim> outputDims = std::vector<Dim>(fShapeY.begin()+2, fShapeY.end());
302 auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W
303 size_t kernelSize = fAttrKernelShape[0];
304 for (size_t i = 1; i < fDim; i++) {
306 }
307
308 std::vector<size_t> shape1 = {fShapeW[0], fShapeW[1], kernelSize};
309 std::vector<Dim> shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, Dim{outputChannelSize}};
312 convK = fNX +"_f";
313 imcol = fNX +"_xcol";
314 fOutputTensorNames.emplace_back(convK);
315 fOutputTensorNames.emplace_back(imcol);
316 fInputTensorNames.emplace_back(convK);
317 fInputTensorNames.emplace_back(imcol);
318
319 if (model.Verbose()) {
320 std::cout << "Conv - " << fDim << " " << fNX << " : " << ConvertShapeToString(fShapeX)
321 << " --> " << fNY << " : " << ConvertShapeToString(fShapeY) << std::endl;
322 }
323 }
324
325 std::string GenerateInitCode() override {
326 std::stringstream out;
327 // Generate initialization code for broadcasting of bias tensor
328 if (!fNB2.empty()) {
329 // include a separate scope to avoid defining unique operator temp variables
330 std::vector<size_t> shape(fDim + 1, 1);
331 shape[0] = fShapeB[0];
332 std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());
333 out << SP << "{\n";
334 out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
335 << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n";
336 out << SP << SP << "std::copy(data, data + " << ConvertDimShapeToLength(targetShape) << ", tensor_" << fNB2 << ");\n";
337 out << SP << SP << "delete[] data;\n";
338 out << SP << "}\n";
339 }
340 return out.str();
341 }
342
343 std::string Generate(std::string OpName) override {
344 OpName = "op_" + OpName;
345
346 if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) {
347 throw
348 std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first");
349 }
350
351 std::stringstream out;
352 auto bsize = fShapeX[0];
353 size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; // kernel depth
354 size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; // kernel height
355 size_t kWidth = fShapeW[fDim+1]; // kernel width
356 auto iDepth = (fDim > 2) ? fShapeX[2] : Dim{1}; // input depth
357 auto iHeight = (fDim > 1) ? fShapeX[fDim] : Dim{1}; // input height
358 auto iWidth = fShapeX[fDim+1]; // input width
359 auto oDepth = (fDim > 2) ? fShapeY[2] : Dim{1}; // output depth
360 auto oHeight = (fDim > 1) ? fShapeY[fDim] : Dim{1}; // ouput height
361 auto oWidth = fShapeY[fDim+1]; // output width
362 // total output size for a channel
363 auto outputChannelStride = ConvertDimShapeToLength(std::vector<Dim>{oDepth, oHeight, oWidth}); // size of channel = D * H * W
364 auto outputBatchStride = ConvertDimShapeToLength(std::vector<Dim>{fShapeY[1] , oDepth, oHeight, oWidth}); // size of C * D * H * W
365 // input size
367 auto inputBatchStride = ConvertDimShapeToLength(std::vector<Dim>{fShapeX[1] , iDepth, iHeight, iWidth}); // size of C * D * H * W
368
369 out << "\n//---- operator Conv " << OpName << "\n";
370
371 // vectorize the (dilated)convolution kernels into a matrix
372 // no need to transpose the matrix
373 // to fix for 1d and 3d
374
375 size_t id = (fDim > 2) ? fDim-3 : 2;
376 size_t ih = (fDim > 1) ? fDim-2 : 1;
377 size_t iw = fDim-1;
378
379 size_t wstrideDil = fAttrDilations[iw];
380 size_t hstride = kWidth;
381 size_t hstrideDil = fAttrDilations[ih] * fAttrKernelShape[iw]; // stride dilated in the height
382 size_t dstride = kHeight * kWidth;
384 size_t icstride = kHeight * kWidth * kDepth;
386 size_t ocstride = fShapeW[1] * icstride;
387 size_t ocstrideDil = fShapeW[1] * icstrideDil;
388
389 out << SP << "for (std::size_t oc = 0; oc < " << fShapeW[0] << "; oc++) {\n";
390 out << SP << SP << "for (std::size_t ic = 0; ic < " << fShapeW[1] << "; ic++) {\n";
391 if (fDim > 2)
392 out << SP << SP << SP << "for (std::size_t kd = 0; kd < " << kDepth << "; kd++) {\n";
393 if (fDim > 1)
394 out << SP << SP << SP << "for (std::size_t kh = 0; kh < " << kHeight << "; kh++) {\n";
395 out << SP << SP << SP << SP << "for (std::size_t kw = 0; kw < " << kWidth << "; kw++) {\n";
396
397 out << SP << SP << SP << SP << SP << "tensor_" <<fNX << "_f[oc * "
398 << ocstrideDil << " + ic * " << icstrideDil;
399 if (fDim > 2) out << " + kd * " << dstrideDil;
400 if (fDim > 1) out << " + kh * " << hstrideDil;
401 out << " + kw * " << wstrideDil << " ] = tensor_" << fNW << "[oc * " << ocstride << " + ic * " << icstride;
402 if (fDim > 2) out << " + kd * " << dstride;
403 if (fDim > 1) out << " + kh * " << hstride;
404 out << " + kw ];\n";
405
406 out << SP << SP << SP << SP << "}\n";
407 if (fDim > 1) out << SP << SP << SP << "}\n";
408 if (fDim > 2) out << SP << SP << SP << "}\n";
409 out << SP << SP << "}\n";
410 out << SP << "}\n";
411
412 //out << SP << "char " << OpName << "_transA = 'T';\n";
413 out << SP << "char " << OpName << "_transA = 'N';\n";
414 out << SP << "char " << OpName << "_transB = 'N';\n";
415 out << SP << "int " << OpName << "_m = " << outputChannelStride << ";\n"; // output h*w
416 assert(fShapeY[1] == fShapeW[0]);
417 //assert(fShapeW[1] == fShapeX[1] / fAttrGroup);
418 out << SP << "int " << OpName << "_n = " << fShapeW[0] << ";\n"; // output channels
419 out << SP << "int " << OpName << "_k = " << fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] << ";\n";
420 out << SP << "float " << OpName << "_alpha = 1.0;\n";
421 out << SP << "float " << OpName << "_beta = 0.0;\n";
422
423
424 // Loop on batch size
425 out << SP << "for (size_t n = 0; n < " << bsize << "; n++) {\n";
426
427 // IM2COL: Unroll the input tensor
428 // order input data as (e.g. kernel 2x2) and (xa,ya) is channel 1 and (xb,yb) is channel 2
429 // (xa1,..,xak,ya1,..yak)(xb1,...,xbk,yb1,..,ybk)
430 // (xa2,...xak+1,ya1,...yak)(......)
431 // trick for speed is using caffe im2col and output a matrix which contains filtered values as rows.
432 // By doing this one has consecutive memory reads and writes
433 // Resulting matrix op_xcol is (input channels * filter_h * filter_w , output_h * output_w)
434 if (fDim ==1) {
435 if (fAttrPads[0] != fAttrPads[1] ) {
436 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding "
437 << std::endl;
438 fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2;
439 }
440 fAttrPads[1] = 0;
441 fAttrStrides[1] = 1;
442 }
443 if (fDim == 2) {
444 if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) {
445 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
446 fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2;
447 fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2;
448 }
449 }
450 if (fDim == 3) {
451 if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) {
452 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
453 fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2;
454 fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2;
455 fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2;
456 }
457 }
458 out << SP << SP << "size_t out_offset = n * " << outputBatchStride << ";\n";
459
460 if (fAttrGroup == 1) {
461 out << SP << SP << "size_t x_offset = n * " << inputBatchStride << ";\n";
462 // when using im2col - resulting matrix is transposed, the dimension is (input_c * filter_h * filter_y, output_h *
463 // output_w)
464 if (fDim < 3) {
465 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
466 << " + x_offset,"
467 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
468 // dilation_w,
469 //
470 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
471 if (fDim == 1)
472 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
473 << fAttrDilations[0];
474 else // dim ==2
475 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
476 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
477 << fAttrDilations[1];
478 out << "," << "tensor_" <<fNX << "_xcol);\n\n ";
479 } else {
480 // 3d im2col
481 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
482 << " + x_offset,"
483 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
484 // dilation_d, dilation_h, dilation_w,
485 //
486 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << ","
487 << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << ","
488 << fAttrPads[0] << "," << fAttrPads[1] << "," << fAttrPads[2] << ","
489 << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
490 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ","
491 << "tensor_" << fNX << "_xcol);\n\n ";
492 }
493 // BLAS
494 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
495 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, &" << OpName
496 << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
497 out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
498 << " + out_offset, &" << OpName << "_m);\n";
499 } else {
500 // case of group convolution
501 // Unroll (IM2COL) the input tensor- make loop on groups and repeat operations (IM2COL + GEMM for each
502 // group)
503 // out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
504 out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n";
505 out << SP << SP << "size_t x_offset = n * " << inputBatchStride << " + g * "
506 << fShapeW[1] << " * " << inputChannelStride << ";\n ";
507 out << SP << SP << "size_t out_offset = n * " << outputBatchStride << " + g * "
508 << fShapeW[0] << " * (" << outputChannelStride << ") / " << fAttrGroup << ";\n ";
509
510 if (fDim < 3) {
511 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
512 << " + x_offset,"
513 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
514 // dilation_w,
515 //
516 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
517 if (fDim == 1)
518 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
519 << fAttrDilations[0];
520 else // dim ==2
521 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
522 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
523 << fAttrDilations[1];
524 out << ", tensor_" << fNX << "_xcol);\n\n ";
525 } else {
526 // 3d im2col
527 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
528 << " + x_offset,"
529 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
530 // dilation_d, dilation_h, dilation_w,
531 //
532 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << "," << fAttrKernelShape[0] << ","
533 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1]
534 << "," << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2]
535 << "," << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ",tensor_" << fNX
536 << "_xcol);\n\n ";
537 }
538
539 // BLAS
540 // n must be divided by the number of groups
541 out << SP << SP << SP << OpName << "_n = " << fShapeW[0] / fAttrGroup << ";\n";
542 // offset g must be g * k * n
543 out << SP << SP << SP << "size_t offset_f = g * "
545 << ";\n";
546 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
547 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName
548 << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
549 out << SP << SP << SP << "tensor_" << fNX << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
550 << " + out_offset"
551 << ", &" << OpName << "_m);\n";
552
553 out << SP << SP << "}\n"; // end of group loop
554 }
555
556 if (fNB2 != "") {
557 out << SP << "int " << OpName << "_size = " << outputBatchStride << ";\n";
558 out << SP << "float " << OpName << "_gamma = 1.0;\n";
559 out << SP << "int " << OpName << "_incx = 1;\n";
560 out << SP << "int " << OpName << "_incy = 1;\n";
561
562 out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB2 << ", &"
563 << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n";
564
565 }
566 out << SP << "}\n"; // end of batch size loop
567
568 return out.str();
569 }
570
571 /*! \brief Returns the blas routines needed to compile the generated code
572 */
573 std::vector<std::string> GetBlasRoutines() override { return { std::string("Gemm"), std::string("Axpy") }; }
574};
575
576} // namespace SOFIE
577} // namespace Experimental
578} // namespace TMVA
579
580#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
std::vector< size_t > GetTensorShape(const std::string &name) const
Definition RModel.cxx:29
std::vector< Dim > GetDimTensorShape(const std::string &name) const
Definition RModel.cxx:65
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:247
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:122
std::shared_ptr< void > GetInitializedTensorData(std::string tensor_name)
Definition RModel.cxx:312
ETensorType GetTensorType(std::string name) const
Definition RModel.cxx:90
void UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector< std::size_t > shape, std::shared_ptr< void > data)
Definition RModel.cxx:303
std::string Generate(std::string OpName) override
ROperator_Conv(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameB, std::string nameY)
std::vector< std::string > GetBlasRoutines() override
Returns the blas routines needed to compile the generated code.
void Initialize(RModel &model) override
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
ROperator_Conv(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameY)
std::vector< Dim > DoShapeInference(const std::vector< Dim > &input, const std::vector< size_t > &weight)
std::vector< std::string_view > fInputTensorNames
Definition ROperator.hxx:47
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:42
bool fUseSession
flag to identify if using the session class
Definition ROperator.hxx:43
std::vector< std::string_view > fOutputTensorNames
Definition ROperator.hxx:48
bool AreSameShape(const std::vector< size_t > &, const std::vector< size_t > &)
std::vector< size_t > ConvertShapeToInt(const std::vector< Dim > &shape)
Convert shape based on Dim to integer format.
ETensorType ConvertStringToType(std::string type)
std::string ConvertDimShapeToLength(const std::vector< Dim > &shape)
std::string ConvertShapeToString(const std::vector< size_t > &shape)
create variable transformations