Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_Conv.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_CONV
2#define TMVA_SOFIE_ROPERATOR_CONV
3
5#include "TMVA/ROperator.hxx"
6#include "TMVA/RModel.hxx"
7
8#include <memory>
9#include <sstream>
10#include <algorithm>
11#include <stdexcept>
12#include <vector>
13#include <cassert>
14
15namespace TMVA {
16namespace Experimental {
17namespace SOFIE {
18
19template<typename T>
20class ROperator_Conv final : public ROperator
21{
22private:
23 std::string fAttrAutopad;
24 std::vector<size_t> fAttrDilations;
25 size_t fAttrGroup;
26 std::vector<size_t> fAttrKernelShape;
27 std::vector<size_t> fAttrPads;
28 std::vector<size_t> fAttrStrides;
29
30 std::string fNX;
31 std::string fNW;
32 std::string fNB;
33 std::string fNB2; // bias tensor name after broadcasting
34 std::string fNY;
35
36 std::vector<size_t> fShapeX;
37 std::vector<size_t> fShapeW;
38 std::vector<size_t> fShapeB;
39 std::vector<size_t> fShapeY;
40
41 std::string fType;
42
43 size_t fDim; // dimension of the convolution
44
45
46public:
47
49
50 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
51 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
52 std::vector<size_t> strides, std::string nameX, std::string nameW,
53 std::string nameB, std::string nameY):
54 fAttrAutopad(autopad), fAttrDilations(dilations), fAttrGroup(group), fAttrKernelShape(kernelShape),
55 fAttrPads(pads), fAttrStrides(strides),
56 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)),
57 fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY))
58 {
59 if(std::is_same<T, float>::value) {
60 fType = "float";
61 } else {
62 throw
63 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
64 }
65 }
66
67 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
68 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
69 std::vector<size_t> strides, std::string nameX, std::string nameW,
70 std::string nameY):
71 fAttrAutopad(autopad), fAttrDilations(dilations), fAttrGroup(group), fAttrKernelShape(kernelShape),
72 fAttrPads(pads), fAttrStrides(strides),
73 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)), fNY(UTILITY::Clean_name(nameY))
74 {
75 if(std::is_same<T, float>::value) {
76 fType = "float";
77 } else {
78 throw
79 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
80 }
81 }
82
83 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) {
84 ETensorType out = input[0];
85 return {out};
86 }
87
88 // funciton returning output shape given input
89 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) {
90 // shape of convolution input has to be (according to ONNX): NxCxHxW
91 // Where N is batch size, C : input channels, H : input height, W = input width
92
93 if (input.size() > 3 ) {
94 throw
95 std::runtime_error("TMVA SOFIE Conv Op Shape inference need 2 or 3 input tensors");
96 }
97 for(size_t i = 0; i < input.size(); i++) {
98 if (input[i].size() -2 != fDim) {
99 throw
100 std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid inputs ");
101 }
102 }
103
104 if (fAttrGroup == 0) {
105 fAttrGroup = input[0][1] / input[1][1];
106 }
107
108 // kernel shape
109 size_t k1 = ((fAttrKernelShape.empty())? input[1][2] : fAttrKernelShape[0]);
110 size_t k2 = (fDim > 1) ? ((fAttrKernelShape.empty()) ? input[1][3] : fAttrKernelShape[1]) : 1;
111 size_t k3 = (fDim > 2) ? ((fAttrKernelShape.empty()) ? input[1][4] : fAttrKernelShape[2]) : 1;
112
113
114 size_t i1 = (fDim > 1) ? ((fDim > 2) ? 3 : 2) : 1;
115 size_t i2 = (fDim > 2) ? 4 : 3;
116 size_t i3 = 5;
117
118 if (fAttrDilations.empty()) {
119 fAttrDilations = {1, 1, 1};
120 }
121 fAttrDilations.resize(3);
122 if (fDim < 3) {
123 fAttrDilations.resize(3, 1);
124 }
125 // Shape of the kernel
126 fAttrKernelShape = {k1 + (fAttrDilations[0] - 1) * (k1 - 1),
127 k2 + (fAttrDilations[1] - 1) * (k2 - 1),
128 k3 + (fAttrDilations[2] - 1) * (k3 - 1)};
129
130 if (fAttrAutopad == "NOTSET") {
131 if (fAttrPads.empty()) {
132 fAttrPads = {1, 1, 1, 1, 1, 1};
133 }
134 } else if (fAttrAutopad == "SAME_UPPER" || fAttrAutopad == "SAME_LOWER") {
135 if (fDim == 1)
137 else if (fDim == 2)
139 else if (fDim == 3)
141 fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[2] / 2};
142 // add extra padding at beginnig or end (depending if SAME_UPPER or SAME_LOWER)
143 // need to check this!
144 if (fAttrKernelShape[0] % 2 == 1) {
145 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[0]++ : fAttrPads[i1]++;
146 }
147 if (fDim > 1 && fAttrKernelShape[1] % 2 == 1) {
148 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[1]++ : fAttrPads[i2]++;
149 }
150 if (fDim > 2 && fAttrKernelShape[2] % 2 == 1) {
151 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[2]++ : fAttrPads[i3]++;
152 }
153 } else if (fAttrAutopad != "VALID") {
154 throw
155 std::runtime_error("TMVA SOFIE Conv Op invalid fAutopad");
156 }
157 // to be sure pad is vector of size 6
158 if (fDim < 3) fAttrPads.resize(6, 0);
159
160 if (fAttrStrides.empty()) {
161 fAttrStrides = {1, 1, 1};
162 }
163 if (fDim < 3)
164 fAttrStrides.resize(3, 1);
165
166
167 size_t input1 = input[0][2];
168 size_t input2 = (fDim > 1) ? input[0][3] : 1;
169 size_t input3 = (fDim > 2) ? input[0][4] : 1;
170
171 size_t pad1 = fAttrPads[0] + fAttrPads[i1];
172 size_t output1 = (input1 + pad1 - fAttrKernelShape[0]) / fAttrStrides[0] + 1;
173
174 size_t batch_size = input[0][0]; // first element in input tensor
175 size_t output_channels = input[1][0]; // first element in weight tensor
176
177 std::vector<std::vector<size_t>> ret({{batch_size, output_channels, output1 }});
178
179 if (fDim == 1)
180 return ret;
181
182 size_t pad2 = fAttrPads[1] + fAttrPads[i2];
183 size_t output2 = (input2 + pad2 - fAttrKernelShape[1]) / fAttrStrides[1] + 1;
184 // output is N x M x OH x OW
185 ret[0].push_back(output2);
186 if (fDim == 2)
187 return ret;
188
189 size_t pad3 = fAttrPads[2] + fAttrPads[i3];
190 size_t output3 = (input3 + pad3 - fAttrKernelShape[2] ) / fAttrStrides[2] + 1;
191
192 // output is N x M x OH x OW x OD
193 ret[0].push_back(output3);
194 return ret;
195 }
196
197 void Initialize(RModel& model) {
198 fUseSession = model.UseSession();
199 if (!model.CheckIfTensorAlreadyExist(fNX)) {
200 throw
201 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model");
202 }
203 fShapeX = model.GetTensorShape(fNX);
204 if (fShapeX.size() < 3 || fShapeX.size() > 5) {
205 std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl;
206 throw
207 std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions");
208 }
209 fDim = fShapeX.size() - 2;
210 if (!model.CheckIfTensorAlreadyExist(fNW)) {
211 throw
212 std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model");
213 }
214 fShapeW = model.GetTensorShape(fNW);
215 if (fShapeW.size() < 3 || fShapeW.size() > 5) {
216 std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl;
217 throw std::runtime_error("TMVA SOFIE Conv Op input weight tensor" + fNW + " is not of 3,4 or 5 dimensions");
218 }
221 if (fNB != "") {
222 if (!model.CheckIfTensorAlreadyExist(fNB)) {
223 throw
224 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model");
225 }
226 fShapeB = model.GetTensorShape(fNB);
227 bool broadcast_needed = (fShapeB.size() != fShapeY.size());
228 if (broadcast_needed) {
229 auto original_data = model.GetInitializedTensorData(fNB);
230 // make bias shape equal to Y shape by adding 1
231 if (fShapeB.size() < 1)
232 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape");
233 // we assume bias tensor dimension is equal to number of filters that is the second dimension in
234 // the output tensor
235 if (fShapeB[0] != fShapeY[1])
236 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " +
238 if (fType != "float")
239 throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported");
240
241 // here the acual broadcasting
242 if (!fUseSession) {
243
244 fShapeB.resize(fShapeY.size(), 1.);
245
246 std::shared_ptr<void> new_data_ptr(
247 UTILITY::Unidirectional_broadcast<float>(static_cast<float *>(original_data.get()), fShapeB, fShapeY),
248 std::default_delete<float[]>());
249 model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), fShapeY, new_data_ptr);
250 fShapeB = model.GetTensorShape(fNB);
251 fNB2 = fNB; // use same name
252 }
253 else {
254 // In case of session add broadcasting code in Session constructor and in GenerateInitCode
255 // we need to add a new intermediate tensor for broadcasted bias tensor
256 fNB2 = fNB + "bcast";
258 }
259 }
260 }
261 }
262
263 std::string GenerateInitCode() {
264
265 size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; // output depth
266 size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; // ouput height
267 size_t oWidth = fShapeY[fDim+1]; // output width
268
269 std::stringstream out;
270 // generate initialization code for broadcasting of bias tensor
271 if (fShapeB.size() != fShapeY.size() && !fNB2.empty() ) {
272 // include a separate scope to avoid defining unique operator temp variables
273 out << " {\n";
274 out << " std::vector<size_t> oldShape = " << ConvertShapeToString(fShapeB) << ";\n";
275 out << " std::vector<size_t> newShape = { " << fShapeY[1] << ", ";
276 if (fDim > 2) out << oDepth << ", ";
277 if (fDim > 1) out << oHeight << ", ";
278 out << oWidth << "};\n";
279 out << " oldShape.resize(newShape.size(), 1.);\n";
280 std::string original_bias_tensor = "tensor_" + fNB;
281 std::string new_bias_tensor = "tensor_" + fNB2;
282 out << " float * newData_ptr = TMVA::Experimental::SOFIE::UTILITY::Unidirectional_broadcast<float>("
283 << original_bias_tensor << ", oldShape, newShape);\n";
284 // extend the new broadcasted bias tensor for the batch dimension
285 int length = fShapeY[1]*oDepth*oHeight*oWidth; // output nc*h*w
286 out << " for (int i = 0; i < " << fShapeY[0] << " ; i++)\n";
287 out << " std::copy(newData_ptr, newData_ptr + " << length << ", "
288 << new_bias_tensor << " + i * " << length << ");\n";
289 out << " delete [] newData_ptr;\n";
290 out << " }\n";
291 }
292 return out.str();
293 }
294
295 // generate code for Session data members (e.g. internal vectors)
296 virtual std::string GenerateSessionMembersCode(std::string opName) {
297
298 size_t outputChannelSize = fShapeY[2]; // size/chanhel = D * H * W
299 size_t kernelSize = fAttrKernelShape[0];
300 for (size_t i = 1; i < fDim; i++) {
301 outputChannelSize *= fShapeY[2 + i];
302 kernelSize *= fAttrKernelShape[i];
303 }
304
305 opName = "op_" + opName;
306 std::stringstream out;
307 // matrix with convolution kernels
308 out << "std::vector<" << fType << "> fVec_" << opName << "_f = std::vector<" << fType << ">("
309 << fShapeW[0] * fShapeW[1] * kernelSize << ");\n";
310 // output matrix of im2col
311 out << "std::vector<" << fType << "> fVec_" << opName << "_xcol = std::vector<" << fType << ">("
312 << fShapeW[1] * kernelSize * outputChannelSize << ");\n";
313 out << "\n";
314
315 return out.str();
316 }
317
318 std::string Generate(std::string OpName) {
319 OpName = "op_" + OpName;
320
321 if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) {
322 throw
323 std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first");
324 }
325
326 std::stringstream out;
327 size_t bsize = fShapeX[0];
328 size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; // kernel depth
329 size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; // kernel height
330 size_t kWidth = fShapeW[fDim+1]; // kernel width
331 size_t iDepth = (fDim > 2) ? fShapeX[2] : 1; // input depth
332 size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1; // input height
333 size_t iWidth = fShapeX[fDim+1]; // input width
334 size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; // output depth
335 size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; // ouput height
336 size_t oWidth = fShapeY[fDim+1]; // output width
337
338 out << "\n//---- operator Conv " << OpName << "\n";
339
340 // create first matrix with convolution kernels
341 if (fUseSession)
342 out << SP << fType << " * " << OpName << "_f = fVec_" << OpName << "_f.data();\n";
343 else
344 out << SP << fType << " " << OpName << "_f[" << fShapeW[0] * fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] << "] = {0};\n";
345
346 // vectorize the (dilated)convolution kernels into a matrix
347 // no need to transpose the matrix
348 // to fix for 1d and 3d
349
350 size_t id = (fDim > 2) ? fDim-3 : 2;
351 size_t ih = (fDim > 1) ? fDim-2 : 1;
352 size_t iw = fDim-1;
353
354 size_t wstrideDil = fAttrDilations[iw];
355 size_t hstride = kWidth;
356 size_t hstrideDil = fAttrDilations[ih] * fAttrKernelShape[iw]; // stride dilated in the height
357 size_t dstride = kHeight * kWidth;
358 size_t dstrideDil = fAttrDilations[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw];
359 size_t icstride = kHeight * kWidth * kDepth;
360 size_t icstrideDil = fAttrKernelShape[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw];
361 size_t ocstride = fShapeW[1] * icstride;
362 size_t ocstrideDil = fShapeW[1] * icstrideDil;
363
364 out << SP << "for (std::size_t oc = 0; oc < " << fShapeW[0] << "; oc++) {\n";
365 out << SP << SP << "for (std::size_t ic = 0; ic < " << fShapeW[1] << "; ic++) {\n";
366 if (fDim > 2)
367 out << SP << SP << SP << "for (std::size_t kd = 0; kd < " << kDepth << "; kd++) {\n";
368 if (fDim > 1)
369 out << SP << SP << SP << "for (std::size_t kh = 0; kh < " << kHeight << "; kh++) {\n";
370 out << SP << SP << SP << SP << "for (std::size_t kw = 0; kw < " << kWidth << "; kw++) {\n";
371
372 out << SP << SP << SP << SP << SP << OpName << "_f[oc * "
373 << ocstrideDil << " + ic * " << icstrideDil;
374 if (fDim > 2) out << " + kd * " << dstrideDil;
375 if (fDim > 1) out << " + kh * " << hstrideDil;
376 out << " + kw * " << wstrideDil << " ] = tensor_" << fNW << "[oc * " << ocstride << " + ic * " << icstride;
377 if (fDim > 2) out << " + kd * " << dstride;
378 if (fDim > 1) out << " + kh * " << hstride;
379 out << " + kw ];\n";
380
381 out << SP << SP << SP << SP << "}\n";
382 if (fDim > 1) out << SP << SP << SP << "}\n";
383 if (fDim > 2) out << SP << SP << SP << "}\n";
384 out << SP << SP << "}\n";
385 out << SP << "}\n";
386
387 //out << SP << "char " << OpName << "_transA = 'T';\n";
388 out << SP << "char " << OpName << "_transA = 'N';\n";
389 out << SP << "char " << OpName << "_transB = 'N';\n";
390 out << SP << "int " << OpName << "_m = " << oHeight * oWidth * oDepth << ";\n"; // output h*w
391 assert(fShapeY[1] == fShapeW[0]);
392 assert(fShapeW[1] == fShapeX[1] / fAttrGroup);
393 out << SP << "int " << OpName << "_n = " << fShapeW[0] << ";\n"; // output channels
394 out << SP << "int " << OpName << "_k = " << fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] << ";\n";
395 out << SP << "float " << OpName << "_alpha = 1.0;\n";
396 out << SP << "float " << OpName << "_beta = 0.0;\n";
397
398 if (fUseSession) {
399 out << SP << fType << " * " << OpName << "_xcol = fVec_" << OpName << "_xcol.data();\n";
400 }
401 else {
402 out << SP << fType << " " << OpName << "_xcol["
403 << fShapeX[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] * oDepth * oHeight * oWidth
404 << "] = {0};\n";
405 }
406
407 // Loop on batch size
408 out << SP << "for (size_t n = 0; n < " << bsize << "; n++) {\n";
409
410 // IM2COL: Unroll the input tensor
411 // order input data as (e.g. kernel 2x2) and (xa,ya) is channel 1 and (xb,yb) is channel 2
412 // (xa1,..,xak,ya1,..yak)(xb1,...,xbk,yb1,..,ybk)
413 // (xa2,...xak+1,ya1,...yak)(......)
414 // trick for speed is using caffe im2col and output a matrix which contains filtered values as rows.
415 // By doing this one has consecutive memory reads and writes
416 // Resulting matrix op_xcol is (input channels * filter_h * filter_w , output_h * output_w)
417 if (fDim ==1) {
418 if (fAttrPads[0] != fAttrPads[1] ) {
419 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding "
420 << std::endl;
421 fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2;
422 }
423 fAttrPads[1] = 0;
424 fAttrStrides[1] = 1;
425 }
426 if (fDim == 2) {
427 if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) {
428 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
429 fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2;
430 fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2;
431 }
432 }
433 if (fDim == 3) {
434 if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) {
435 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
436 fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2;
437 fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2;
438 fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2;
439 }
440 }
441 if (fAttrGroup == 1) {
442 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iHeight * iWidth << ";\n";
443 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oHeight * oWidth << ";\n";
444 // when using im2col - resulting matrix is transposed, is (input_c * filter_h * filter_y, output_h *
445 // output_w)
446 if (fDim < 3) {
447 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
448 << " + x_offset,"
449 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
450 // dilation_w,
451 //
452 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
453 if (fDim == 1)
454 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
455 << fAttrDilations[0];
456 else // dim ==2
457 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
458 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
459 << fAttrDilations[1];
460 out << "," << OpName << "_xcol);\n\n ";
461 } else {
462 // 3d im2col
463 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
464 << " + x_offset,"
465 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
466 // dilation_d, dilation_h, dilation_w,
467 //
468 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << ","
469 << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << ","
470 << fAttrPads[0] << "," << fAttrPads[1] << "," << fAttrPads[2] << ","
471 << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
472 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ","
473 << OpName << "_xcol);\n\n ";
474 }
475 // BLAS
476 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
477 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << OpName << "_xcol, &" << OpName
478 << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
479 out << SP << SP << SP << OpName << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
480 << " + out_offset, &" << OpName << "_m);\n";
481 } else {
482 // case of group convolution
483 // Unroll (IM2COL) the input tensor- make loop on groups and repeat operations (IM2COL + GEMM for each
484 // group)
485 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
486 out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n";
487 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iDepth * iHeight * iWidth << " + g * "
488 << fShapeW[1] * iDepth * iHeight * iWidth << ";\n ";
489 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << " + g * "
490 << fShapeW[0] * oDepth * oHeight * oWidth / fAttrGroup << ";\n ";
491
492 if (fDim < 3) {
493 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
494 << " + x_offset,"
495 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
496 // dilation_w,
497 //
498 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
499 if (fDim == 1)
500 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
501 << fAttrDilations[0];
502 else // dim ==2
503 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
504 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
505 << fAttrDilations[1];
506 out << "," << OpName << "_xcol);\n\n ";
507 } else {
508 // 3d im2col
509 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
510 << " + x_offset,"
511 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
512 // dilation_d, dilation_h, dilation_w,
513 //
514 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << "," << fAttrKernelShape[0] << ","
515 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1]
516 << "," << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2]
517 << "," << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << "," << OpName
518 << "_xcol);\n\n ";
519 }
520
521 // BLAS
522 // n must be divided by the number of groups
523 out << SP << SP << SP << OpName << "_n = " << fShapeW[0] / fAttrGroup << ";\n";
524 // offset g must be g * k * n
525 out << SP << SP << SP << "size_t offset_f = g * "
527 << ";\n";
528 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
529 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << OpName << "_xcol, &" << OpName
530 << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
531 out << SP << SP << SP << OpName << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
532 << " + out_offset"
533 << ", &" << OpName << "_m);\n";
534
535 out << SP << SP << "}\n"; // end of group loop
536 }
537
538 out << SP << "}\n"; // end of batch size loop
539
540
541 if (fNB2 != "") {
542 out << SP << "int " << OpName << "_size = " << fShapeY[0] * fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
543 out << SP << "float " << OpName << "_gamma = 1.0;\n";
544 out << SP << "int " << OpName << "_incx = 1;\n";
545 out << SP << "int " << OpName << "_incy = 1;\n";
546
547 out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB2 << ", &"
548 << OpName << "_incx, tensor_" << fNY << ", &" << OpName << "_incy);\n";
549
550 }
551
552
553 return out.str();
554 }
555};
556
557} // namespace SOFIE
558} // namespace Experimental
559} // namespace TMVA
560
561#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
XFontStruct * id
Definition TGX11.cxx:109
const ETensorType & GetTensorType(std::string name)
Definition RModel.cxx:70
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< std::size_t > shape)
Definition RModel.cxx:136
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:91
const std::vector< size_t > & GetTensorShape(std::string name)
Definition RModel.cxx:49
std::shared_ptr< void > GetInitializedTensorData(std::string tensor_name)
Definition RModel.cxx:160
void UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector< std::size_t > shape, std::shared_ptr< void > data)
Definition RModel.cxx:151
virtual std::string GenerateSessionMembersCode(std::string opName)
std::string Generate(std::string OpName)
ROperator_Conv(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameB, std::string nameY)
ROperator_Conv(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameY)
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input)
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input)
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:39
bool fUseSession
flag to identify if using the session class
Definition ROperator.hxx:40
std::string ConvertShapeToString(std::vector< size_t > shape)
create variable transformations