doc/hackathon/ROperator__Conv_8hxx_source.html

#ifndef TMVA_SOFIE_ROPERATOR_CONV

#define TMVA_SOFIE_ROPERATOR_CONV


#include "TMVA/SOFIE_common.hxx"

#include "TMVA/ROperator.hxx"

#include "TMVA/RModel.hxx"


#include <memory>

#include <sstream>

#include <algorithm>

#include <stdexcept>

#include <vector>

#include <cassert>


namespace TMVA {

namespace Experimental {

namespace SOFIE {


template<typename T>


class ROperator_Conv final : public ROperator

{

private:

   bool fBroadcastBias = false;


   std::string fAttrAutopad;

   std::vector<size_t> fAttrDilations;

   size_t fAttrGroup;

   std::vector<size_t> fAttrKernelShape;

   std::vector<size_t> fAttrPads;

   std::vector<size_t> fAttrStrides;


   std::string fNX;

   std::string fNW;

   std::string fNB;

   std::string fNY;


   std::string convK;

   std::string imcol;


   std::vector<Dim> fShapeX;

   std::vector<size_t> fShapeW;

   std::vector<size_t> fShapeB;

   std::vector<Dim> fShapeY;


   std::string fType;


   size_t fDim;   // dimension of the convolution


public:


   ROperator_Conv() {}


   ROperator_Conv(std::string autopad, std::vector<size_t> dilations,

      size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,

      std::vector<size_t> strides, std::string nameX, std::string nameW,

      std::string nameB, std::string nameY):

      fAttrAutopad(autopad), fAttrDilations(dilations), fAttrGroup(group), fAttrKernelShape(kernelShape),

      fAttrPads(pads), fAttrStrides(strides),

      fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)),

      fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY))

   {

      if(std::is_same<T, float>::value) {

         fType = "float";

      } else {

         throw

            std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");

      }

      fInputTensorNames = { fNX, fNB };

      fOutputTensorNames = { fNY };

   }


   ROperator_Conv(std::string autopad, std::vector<size_t> dilations,

      size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,

      std::vector<size_t> strides, std::string nameX, std::string nameW,

      std::string nameY):

      fAttrAutopad(autopad), fAttrDilations(dilations), fAttrGroup(group), fAttrKernelShape(kernelShape),

      fAttrPads(pads), fAttrStrides(strides),

      fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)), fNY(UTILITY::Clean_name(nameY))

   {

      if(std::is_same<T, float>::value) {

         fType = "float";

      } else {

         throw

            std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");

      }

      fInputTensorNames = { fNX };

      fOutputTensorNames = { fNY };

   }


   std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {

      ETensorType out = input[0];

      return {out};

   }


   // function returning output shape given input


   std::vector<Dim> DoShapeInference(const std::vector<Dim> & input, const std::vector<size_t> & weight) {

      // shape of convolution input has to be (according to ONNX): N x C x H x W

      // Where N : batch size, C : input  channels, H : input height, W : input width


      if (input.size() -2 != fDim) {

         throw std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid input ");

      }

      if (weight.size() -2 != fDim) {

         throw std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid weights ");

      }

      if (fAttrGroup == 0 && input[1].isParam)

         throw std::runtime_error("TMVA SOFIE Conv - param shapes not supported without group attr");

      if (fAttrKernelShape.empty()) {

         if (input[2].isParam || (fDim > 1 && input[3].isParam) || (fDim > 2 && input[4].isParam))

            throw std::runtime_error("TMVA SOFIE Conv - param shapes not supported without kernel attr");

      }


      if (fAttrGroup == 0) {

         fAttrGroup = input[1].dim / weight[1];

      }


      // kernel shape

      size_t k1 = ((fAttrKernelShape.empty())? weight[2] : fAttrKernelShape[0]);

      size_t k2 = (fDim > 1) ? ((fAttrKernelShape.empty()) ? weight[3] : fAttrKernelShape[1]) : 1;

      size_t k3 = (fDim > 2) ? ((fAttrKernelShape.empty()) ? weight[4] : fAttrKernelShape[2]) : 1;


      size_t i1 = (fDim > 1) ? ((fDim > 2) ? 3 : 2) : 1;

      size_t i2 = (fDim > 2) ? 4 : 3;

      size_t i3 = 5;


      if (fAttrDilations.empty()) {

         fAttrDilations = {1, 1, 1};

      }

      fAttrDilations.resize(3);

      if (fDim < 3) {

         fAttrDilations.resize(3, 1);

      }

      // Shape of the kernel

      fAttrKernelShape = {k1 + (fAttrDilations[0] - 1) * (k1 - 1),

                          k2 + (fAttrDilations[1] - 1) * (k2 - 1),

                          k3 + (fAttrDilations[2] - 1) * (k3 - 1)};


      if (fAttrStrides.empty()) {

         fAttrStrides = {1, 1, 1};

      }

      if (fDim < 3)

         fAttrStrides.resize(3, 1);


      if (fAttrAutopad == "NOTSET") {

         if (fAttrPads.empty()) {

            fAttrPads = {1, 1, 1, 1, 1, 1};

         }

      } else if (fAttrAutopad == "SAME_UPPER" || fAttrAutopad == "SAME_LOWER") {

         for (size_t d = 0; d < fDim; ++d) {

            if (input[d + 2].isParam)

               throw std::runtime_error(

                  "TMVA SOFIE Conv Op: SAME padding with parametric input shape is not supported");

         }

         // ONNX SAME padding: total_pad = max(0, (ceil(in/stride)-1)*stride + kernel - in)

         // SAME_UPPER places extra padding at end, SAME_LOWER at beginning

         fAttrPads.assign(6, 0);

         for (size_t d = 0; d < fDim; ++d) {

            size_t inSize = input[d + 2].dim;

            size_t stride_d = fAttrStrides[d];

            size_t outSize = (inSize + stride_d - 1) / stride_d;

            int totalPad = std::max(0, (int)((outSize - 1) * stride_d + fAttrKernelShape[d]) - (int)inSize);

            if (fAttrAutopad == "SAME_UPPER") {

               fAttrPads[d] = (size_t)(totalPad / 2);

               fAttrPads[d + fDim] = (size_t)(totalPad - totalPad / 2);

            } else {

               fAttrPads[d] = (size_t)(totalPad - totalPad / 2);

               fAttrPads[d + fDim] = (size_t)(totalPad / 2);

            }

         }

      } else if (fAttrAutopad != "VALID") {

         throw

            std::runtime_error("TMVA SOFIE Conv Op invalid fAutopad");

      }

      // to be sure pad is vector of size 6

      if (fDim < 3) fAttrPads.resize(6, 0);


      Dim input1 = input[2];

      Dim input2 = (fDim > 1) ? input[3] : Dim{1};

      Dim input3 = (fDim > 2) ? input[4] : Dim{1};


      size_t pad1 = fAttrPads[0] + fAttrPads[i1];


      // function to get output dimension of convolution given input


      auto computeOutput = [&](Dim inputDim, size_t kernel, size_t pad, size_t stride) {

         if (!inputDim.isParam) {

            size_t outSize = (inputDim.dim + pad - kernel) / stride + 1;

            return  Dim{outSize};

         } else {

            if (stride == 1){

               if ((pad - kernel + 1) == 0 )

                  // output is same as input

                  return inputDim;

               else  {

                  int64_t v =  pad - kernel + 1;

                  std::string outStr = "(" + inputDim.param + "+" + std::to_string(v) + ")";

                  return Dim{ outStr, static_cast<size_t>(-1)};

               }

            } else { // general case (stride not 1)

               int64_t v =  pad - kernel;

               std::string outStr =

                  "((" + inputDim.param + "+" + std::to_string(v) + ")/" + std::to_string(stride) + "+1)";

               return Dim{ outStr, static_cast<size_t>(-1)};

            }

         }

         throw std::runtime_error("TMVA SOFIE Conv Op -  invalid values");

         return Dim{};

      };


      Dim output1 = computeOutput(input1, fAttrKernelShape[0], pad1, fAttrStrides[0]);


      Dim batch_size = input[0];        // first element in input tensor

      Dim output_channels = Dim{weight[0]};   // first element in weight tensor


      std::vector<Dim> ret({ batch_size, output_channels, output1 });


      if (fDim == 1)

         return ret;


      size_t pad2 = fAttrPads[1] + fAttrPads[i2];

      Dim output2 = computeOutput(input2, fAttrKernelShape[1], pad2, fAttrStrides[1]);


      // output is N x M x OH x OW

      ret.push_back(output2);

      if (fDim == 2)

         return ret;


      size_t pad3 = fAttrPads[2] + fAttrPads[i3];

      Dim output3 = computeOutput(input3, fAttrKernelShape[2], pad3, fAttrStrides[2]);


      // output is N x M x OH x OW x OD

      ret.push_back(output3);

      return ret;

   }


   void Initialize(RModel& model) override {

      fUseSession = model.UseSession();

      if (!model.CheckIfTensorAlreadyExist(fNX)) {

         throw

            std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model");

      }

      fShapeX = model.GetDimTensorShape(fNX);

      if (fShapeX.size() < 3 || fShapeX.size()  > 5) {

         std::cout << fNX << " : " << ConvertDimShapeToString(fShapeX) << std::endl;

         throw

            std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions");

      }

      fDim = fShapeX.size() - 2;

      if (!model.CheckIfTensorAlreadyExist(fNW)) {

         throw

            std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model");

      }

      fShapeW = model.GetTensorShape(fNW);

      if (fShapeW.size() < 3 || fShapeW.size()  > 5) {

         std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl;

         throw std::runtime_error("TMVA SOFIE Conv Op input weight tensor" + fNW + " is not of 3,4 or 5 dimensions");

      }

      fShapeY = DoShapeInference(fShapeX, fShapeW);

      model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);

      if (fNB != "") {

         if (!model.CheckIfTensorAlreadyExist(fNB)) {

            throw

               std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model");

         }

         fShapeB = model.GetTensorShape(fNB);

         if (fShapeB.size() != 1)

            throw

               std::runtime_error("TMVA SOFIE Conv op : invalid shape for Bias tensor (is not 1D)");

         std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());

         auto shapeDimB = model.GetDimTensorShape(fNB);

         bool broadcast_needed = !UTILITY::AreSameShape(shapeDimB, targetShape);

         if (broadcast_needed) {

            auto original_data = model.GetInitializedTensorData(fNB);

            // make bias shape equal to Y shape by adding 1

            if (fShapeB.size() < 1)

               throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape");

            // we assume bias tensor dimension is equal to number of filters that is the second dimension in

            // the output tensor

            if (!(shapeDimB[0] == fShapeY[1]))

               throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " +

                                           ConvertShapeToString(fShapeB));

            if (fType != "float")

               throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported");

            // here is the actual broadcasting

            fBroadcastBias = true;

            if (!fUseSession) {

               // do here broadcasting

               std::vector<size_t> shape(fDim + 1, 1);

               shape[0] = fShapeB[0];

               auto intTargetShape = ConvertShapeToInt(targetShape);

               std::shared_ptr<void> new_data_ptr(

                  UTILITY::UnidirectionalBroadcast(static_cast<float *>(original_data.get()), shape, intTargetShape),

                  std::default_delete<float[]>());

               model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), intTargetShape, new_data_ptr);

               fShapeB = model.GetTensorShape(fNB);

            }

         }

      }

      // output channel size can be parametric and is an expression

      std::vector<Dim> outputDims = std::vector<Dim>(fShapeY.begin()+2, fShapeY.end());

      //check if shape is not parametric

      std::vector<size_t> outputInts = ConvertShapeToInt(outputDims);

      Dim channelDim;

      if (outputInts.empty()) {

         auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W

         channelDim = Dim{ outputChannelSize, static_cast<size_t>(-1)};

      } else {

         size_t outputChannelSize = ConvertShapeToLength(outputInts);

         channelDim = Dim{ outputChannelSize };

      }

      size_t kernelSize = fAttrKernelShape[0];

      for (size_t i = 1; i < fDim; i++) {

         kernelSize *= fAttrKernelShape[i];

      }


      std::vector<size_t> shape1 = {fShapeW[0], fShapeW[1], kernelSize};

      std::vector<Dim> shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, channelDim };

      model.AddIntermediateTensor(fNX +"_f", ConvertStringToType(fType), shape1 );

      model.AddIntermediateTensor(fNX +"_xcol", ConvertStringToType(fType), shape2 );

      convK = fNX +"_f";

      imcol = fNX +"_xcol";

      fOutputTensorNames.emplace_back(convK);

      fOutputTensorNames.emplace_back(imcol);

      fInputTensorNames.emplace_back(convK);

      fInputTensorNames.emplace_back(imcol);


      if (model.Verbose()) {

         std::cout << "Conv - " << fDim << "  " << fNX << " : " << ConvertDimShapeToString(fShapeX)

                  << " --> " << fNY << " : " << ConvertDimShapeToString(fShapeY) << std::endl;

      }

   }


   std::string GenerateInitCode() override {

      std::stringstream out;

      // Generate initialization code for broadcasting of bias tensor

      if (fBroadcastBias) {

         // include a separate scope to avoid defining unique operator temp variables

         std::vector<size_t> shape(fDim + 1, 1);

         // bias (is a 1D tensor)

         shape[0] = fShapeB[0];

         std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());

         out << "//--- broadcast bias tensor " << fNB << "for Conv op if needed \n";

         // in case of dynamic tensors check needs to be done at run time

         bool isOutDynamic = ConvertShapeToInt(targetShape).empty();

         auto length = ConvertDimShapeToLength(targetShape);

         if (isOutDynamic)

            out << SP << "if (" << length << " > " << ConvertShapeToLength(shape) << ") {\n";

         else

            out << SP << "{\n";

         out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_"

             << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertDimShapeToString(fShapeY) << ");\n";

         out << SP << SP << "fTensor_" << fNB << ".resize(" << length << ");\n";

         out << SP << SP << "std::copy(data, data + " << length << ", fTensor_" << fNB << ".begin());\n";

         out << SP << SP << "tensor_" << fNB << " = fTensor_" << fNB << ".data();\n";

         out << SP << SP << "delete[] data;\n";

         out << SP << "}\n";

      }

      return out.str();

   }


   std::string Generate(std::string OpName) override {

      OpName = "op_" + OpName;


      if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) {

         throw

            std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first");

      }


      std::stringstream out;

      auto bsize = fShapeX[0];

      size_t kDepth = (fDim > 2) ?  fShapeW[2] : 1;  // kernel depth

      size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1;  // kernel height

      size_t kWidth = fShapeW[fDim+1]; // kernel width

      auto iDepth = (fDim > 2) ?  fShapeX[2] : Dim{1};  // input depth

      auto iHeight = (fDim > 1) ? fShapeX[fDim] : Dim{1}; // input height

      auto iWidth = fShapeX[fDim+1]; // input width

      auto oDepth = (fDim > 2) ? fShapeY[2] : Dim{1}; // output depth

      auto oHeight = (fDim > 1) ? fShapeY[fDim] : Dim{1};  // ouput height

      auto oWidth = fShapeY[fDim+1]; // output width

      // total output size for a channel

      auto outputChannelStride = ConvertDimShapeToLength(std::vector<Dim>{oDepth, oHeight, oWidth}); // size of channel = D * H * W

      auto outputBatchStride =  ConvertDimShapeToLength(std::vector<Dim>{fShapeY[1] , oDepth, oHeight, oWidth}); // size of C * D * H * W

      // input size

      auto inputChannelStride = ConvertDimShapeToLength(std::vector<Dim>{iDepth, iHeight, iWidth});

      auto inputBatchStride =  ConvertDimShapeToLength(std::vector<Dim>{fShapeX[1] , iDepth, iHeight, iWidth}); // size of C * D * H * W


      out << "\n//----  operator Conv " << OpName << "\n";


      // vectorize the (dilated)convolution kernels into a matrix

      // no need to transpose the matrix

      // to fix for 1d and 3d


      size_t id = (fDim > 2) ? fDim-3 : 2;

      size_t ih = (fDim > 1) ? fDim-2 : 1;

      size_t iw = fDim-1;


      size_t wstrideDil = fAttrDilations[iw];

      size_t hstride = kWidth;

      size_t hstrideDil = fAttrDilations[ih] * fAttrKernelShape[iw];  // stride dilated in the height

      size_t dstride = kHeight * kWidth;

      size_t dstrideDil = fAttrDilations[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw];

      size_t icstride = kHeight * kWidth * kDepth;

      size_t icstrideDil = fAttrKernelShape[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw];

      size_t ocstride = fShapeW[1] * icstride;

      size_t ocstrideDil = fShapeW[1] * icstrideDil;


      out << SP << "for (std::size_t oc = 0; oc < " << fShapeW[0] << "; oc++) {\n";

      out << SP << SP << "for (std::size_t ic = 0; ic < " << fShapeW[1] << "; ic++) {\n";

      if (fDim > 2)

         out << SP << SP << SP << "for (std::size_t kd = 0; kd < " << kDepth << "; kd++) {\n";

      if (fDim > 1)

         out << SP << SP << SP << "for (std::size_t kh = 0; kh < " << kHeight << "; kh++) {\n";

      out << SP << SP << SP << SP << "for (std::size_t kw = 0; kw < " << kWidth << "; kw++) {\n";


      out << SP << SP << SP << SP << SP << "tensor_" <<fNX <<  "_f[oc * "

          << ocstrideDil << " + ic * " << icstrideDil;

      if (fDim > 2) out << " + kd * " << dstrideDil;

      if (fDim > 1) out << " + kh * " << hstrideDil;

      out << " + kw * " << wstrideDil  << "  ] = tensor_" << fNW << "[oc * " << ocstride << " + ic * " << icstride;

      if (fDim > 2) out << " + kd * " << dstride;

      if (fDim > 1) out << " + kh * " << hstride;

      out  << " + kw ];\n";


      out << SP << SP << SP << SP << "}\n";

      if (fDim > 1) out << SP << SP << SP << "}\n";

      if (fDim > 2) out << SP << SP << SP << "}\n";

      out << SP << SP << "}\n";

      out << SP << "}\n";


      //out << SP << "char " << OpName << "_transA = 'T';\n";

      out << SP << "char " << OpName << "_transA = 'N';\n";

      out << SP << "char " << OpName << "_transB = 'N';\n";

      out << SP << "int " << OpName << "_m = " << outputChannelStride << ";\n"; // output h*w

      assert(fShapeY[1] == fShapeW[0]);

      //assert(fShapeW[1] == fShapeX[1] / fAttrGroup);

      out << SP << "int " << OpName << "_n = " << fShapeW[0] << ";\n"; // output channels

      out << SP << "int " << OpName << "_k = " << fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] << ";\n";

      out << SP << "float " << OpName << "_alpha = 1.0;\n";

      if (fNB != "")

         out << SP << "float " << OpName << "_beta = 1.0;\n";

      else // when bias is not present beta needs to be equal to zero to avoid re-using previous results in output tensor

         out << SP << "float " << OpName << "_beta = 0.0;\n";


      // Loop on batch size

      out << SP << "for (size_t n = 0; n < " << bsize << "; n++) {\n";


      // IM2COL: Unroll the input tensor

      // order input data as  (e.g. kernel 2x2)  and (xa,ya) is channel 1 and (xb,yb) is channel 2

      //   (xa1,..,xak,ya1,..yak)(xb1,...,xbk,yb1,..,ybk)

      //   (xa2,...xak+1,ya1,...yak)(......)

      // trick for speed is using caffe im2col and output a matrix which contains filtered values as rows.

      // By doing this one has consecutive memory reads and writes

      // Resulting matrix op_xcol is (input channels * filter_h * filter_w , output_h * output_w)

      if (fDim ==1) {

         if (fAttrPads[0] != fAttrPads[1] ) {

            std::cout << "TMVA SOFIE Operator Conv:  asymmetric padding not supported. Assume an average padding "

                      << std::endl;

            fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2;

         }

         fAttrPads[1] = 0;

         fAttrStrides[1] = 1;

      }

      if (fDim == 2) {

         if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) {

            std::cout << "TMVA SOFIE Operator Conv:  asymmetric padding not supported. Assume an average padding " << std::endl;

            fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2;

            fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2;

         }

      }

      if (fDim == 3) {

         if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) {

            std::cout << "TMVA SOFIE Operator Conv:  asymmetric padding not supported. Assume an average padding " << std::endl;

            fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2;

            fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2;

            fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2;

         }

      }

      out << SP << SP << "size_t out_offset = n * " << outputBatchStride  << ";\n";


      if (fAttrGroup == 1) {

         out << SP << SP << "size_t x_offset = n * " << inputBatchStride << ";\n";

         // when using im2col - resulting matrix is transposed, the dimension is (input_c * filter_h * filter_y,  output_h *

         // output_w)

         if (fDim < 3) {

            out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX

                << " + x_offset,"

                //  channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,

                //  dilation_w,

                //

                << fShapeW[1] << "," << iHeight << "," << iWidth << ",";

            if (fDim == 1)

               out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"

                   << fAttrDilations[0];

            else // dim ==2

               out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]

                   << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","

                   << fAttrDilations[1];

            out << "," << "tensor_" <<fNX << "_xcol);\n\n ";

         } else {

            // 3d im2col

            out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX

                << " + x_offset,"

                //  channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,

                //  dilation_d, dilation_h, dilation_w,

                //

                << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << ","

                << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << ","

                << fAttrPads[0] << "," << fAttrPads[1] << "," << fAttrPads[2] << ","

                << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","

                << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ","

                << "tensor_" << fNX << "_xcol);\n\n ";

         }

         // BLAS

         out << SP << "TMVA::Experimental::SOFIE::Gemm_Call("

             << "tensor_" << fNY << " + out_offset, false, false, "

             << OpName << "_m, " << OpName << "_n, " << OpName << "_k, "

             << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, tensor_" << fNX << "_f, "

             << OpName << "_beta, ";

         if (fNB != "")

            out << "tensor_" << fNB;

         else

            out << "nullptr";

         out << ");\n";


         // out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"

         //     << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, &" << OpName

         //     << "_m,\n"; // use m if op_xcol is not transpose , otherwise k

         // out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY

         //     << " + out_offset, &" << OpName << "_m);\n";

      } else {

         // case of group convolution

         // Unroll (IM2COL) the input tensor- make loop on groups and repeat operations (IM2COL + GEMM for each

         // group)

         // out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";

         out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n";

         out << SP << SP << "size_t x_offset = n * " << inputBatchStride << " + g * "

             << fShapeW[1] << " * " << inputChannelStride << ";\n ";

         out << SP << SP << "size_t g_offset = g * " << fShapeW[0] << " * (" << outputChannelStride << ") / " << fAttrGroup << ";\n ";

         out << SP << SP << "size_t out_offset = n * " << outputBatchStride << " + g_offset;\n";


         if (fDim < 3) {

            out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX

                << " + x_offset,"

                //  channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,

                //  dilation_w,

                //

                << fShapeW[1] << "," << iHeight << "," << iWidth << ",";

            if (fDim == 1)

               out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"

                   << fAttrDilations[0];

            else // dim ==2

               out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]

                   << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","

                   << fAttrDilations[1];

            out << ", tensor_" << fNX << "_xcol);\n\n ";

         } else {

            // 3d im2col

            out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX

                << " + x_offset,"

                //  channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,

                //  dilation_d, dilation_h, dilation_w,

                //

                << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << "," << fAttrKernelShape[0] << ","

                << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1]

                << "," << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2]

                << "," << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ",tensor_" << fNX

                << "_xcol);\n\n ";

         }


         // BLAS

         // n must be divided by the number of groups

         out << SP << SP << SP << OpName << "_n = " << fShapeW[0] / fAttrGroup << ";\n";

         // offset g must be  g * k * n

         out << SP << SP << SP << "size_t offset_f = g * "

             << fShapeW[0] * fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] / fAttrGroup

             << ";\n";


         out << SP << "TMVA::Experimental::SOFIE::Gemm_Call("

             << "tensor_" << fNY << " + out_offset, false, false, "

             << OpName << "_m, " << OpName << "_n, " << OpName << "_k, "

             << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, tensor_" << fNX << "_f + offset_f, "

             << OpName << "_beta, ";

         if (fNB != "")

            out << "tensor_" << fNB << " + g_offset";

         else

            out << "nullptr";

         out << ");\n";


         // out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"

         //     << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName

         //     << "_m,\n"; // use m if op_xcol is not transpose , otherwise k

         // out << SP << SP << SP << "tensor_" << fNX << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY

         //     << " + out_offset"

         //     << ", &" << OpName << "_m);\n";


         out << SP << SP << "}\n"; // end of group loop

      }


      // if (fNB != "") {

      //    out << SP << "int " << OpName << "_size = " << outputBatchStride << ";\n";

      //    out << SP << "float " << OpName << "_gamma = 1.0;\n";

      //    out << SP << "int " << OpName << "_incx = 1;\n";

      //    out << SP << "int " << OpName << "_incy = 1;\n";


      //    out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB << ", &"

      //        << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n";


      // }

      out << SP << "}\n"; // end of batch size loop


      return out.str();

      }


   /*! \brief Returns the blas routines needed to compile the generated code

    */

   std::vector<std::string> GetBlasRoutines() override { return { std::string("Gemm"), std::string("Axpy") }; }

};


} // namespace SOFIE

} // namespace Experimental

} // namespace TMVA


#endif

RModel.hxx

ROperator.hxx

d
#define d(i)
Definition RSha256.hxx:102

ret
char * ret
Definition Rotated.cxx:221

SOFIE_common.hxx

id
XFontStruct * id
Definition TGX11.cxx:147

TMVA::Experimental::SOFIE::RModel
Definition RModel.hxx:12

TMVA::Experimental::SOFIE::ROperator_Conv::fAttrDilations
std::vector< size_t > fAttrDilations
Definition ROperator_Conv.hxx:26

TMVA::Experimental::SOFIE::ROperator_Conv::fBroadcastBias
bool fBroadcastBias
Definition ROperator_Conv.hxx:23

TMVA::Experimental::SOFIE::ROperator_Conv::ROperator_Conv
ROperator_Conv()
Definition ROperator_Conv.hxx:52

TMVA::Experimental::SOFIE::ROperator_Conv::Generate
std::string Generate(std::string OpName) override
Definition ROperator_Conv.hxx:363

TMVA::Experimental::SOFIE::ROperator_Conv::fShapeY
std::vector< Dim > fShapeY
Definition ROperator_Conv.hxx:43

TMVA::Experimental::SOFIE::ROperator_Conv::convK
std::string convK
Definition ROperator_Conv.hxx:37

TMVA::Experimental::SOFIE::ROperator_Conv::imcol
std::string imcol
Definition ROperator_Conv.hxx:38

TMVA::Experimental::SOFIE::ROperator_Conv::fNX
std::string fNX
Definition ROperator_Conv.hxx:32

TMVA::Experimental::SOFIE::ROperator_Conv::fAttrAutopad
std::string fAttrAutopad
Definition ROperator_Conv.hxx:25

TMVA::Experimental::SOFIE::ROperator_Conv::fNW
std::string fNW
Definition ROperator_Conv.hxx:33

TMVA::Experimental::SOFIE::ROperator_Conv::fShapeW
std::vector< size_t > fShapeW
Definition ROperator_Conv.hxx:41

TMVA::Experimental::SOFIE::ROperator_Conv::ROperator_Conv
ROperator_Conv(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameB, std::string nameY)
Definition ROperator_Conv.hxx:54

TMVA::Experimental::SOFIE::ROperator_Conv::GetBlasRoutines
std::vector< std::string > GetBlasRoutines() override
Returns the blas routines needed to compile the generated code.
Definition ROperator_Conv.hxx:620

TMVA::Experimental::SOFIE::ROperator_Conv::fShapeX
std::vector< Dim > fShapeX
Definition ROperator_Conv.hxx:40

TMVA::Experimental::SOFIE::ROperator_Conv::fType
std::string fType
Definition ROperator_Conv.hxx:45

TMVA::Experimental::SOFIE::ROperator_Conv::fDim
size_t fDim
Definition ROperator_Conv.hxx:47

TMVA::Experimental::SOFIE::ROperator_Conv::Initialize
void Initialize(RModel &model) override
Definition ROperator_Conv.hxx:238

TMVA::Experimental::SOFIE::ROperator_Conv::fNB
std::string fNB
Definition ROperator_Conv.hxx:34

TMVA::Experimental::SOFIE::ROperator_Conv::GenerateInitCode
std::string GenerateInitCode() override
Definition ROperator_Conv.hxx:335

TMVA::Experimental::SOFIE::ROperator_Conv::fAttrStrides
std::vector< size_t > fAttrStrides
Definition ROperator_Conv.hxx:30

TMVA::Experimental::SOFIE::ROperator_Conv::fAttrGroup
size_t fAttrGroup
Definition ROperator_Conv.hxx:27

TMVA::Experimental::SOFIE::ROperator_Conv::TypeInference
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
Definition ROperator_Conv.hxx:91

TMVA::Experimental::SOFIE::ROperator_Conv::fShapeB
std::vector< size_t > fShapeB
Definition ROperator_Conv.hxx:42

TMVA::Experimental::SOFIE::ROperator_Conv::fAttrPads
std::vector< size_t > fAttrPads
Definition ROperator_Conv.hxx:29

TMVA::Experimental::SOFIE::ROperator_Conv::ROperator_Conv
ROperator_Conv(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameY)
Definition ROperator_Conv.hxx:73

TMVA::Experimental::SOFIE::ROperator_Conv::DoShapeInference
std::vector< Dim > DoShapeInference(const std::vector< Dim > &input, const std::vector< size_t > &weight)
Definition ROperator_Conv.hxx:97

TMVA::Experimental::SOFIE::ROperator_Conv::fNY
std::string fNY
Definition ROperator_Conv.hxx:35

TMVA::Experimental::SOFIE::ROperator_Conv::fAttrKernelShape
std::vector< size_t > fAttrKernelShape
Definition ROperator_Conv.hxx:28

TMVA::Experimental::SOFIE::ROperator
Definition ROperator.hxx:18

TMVA::Experimental::SOFIE::ROperator::fInputTensorNames
std::vector< std::string_view > fInputTensorNames
Definition ROperator.hxx:50

TMVA::Experimental::SOFIE::ROperator::SP
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:45

TMVA::Experimental::SOFIE::ROperator::fUseSession
bool fUseSession
flag to identify if using the session class
Definition ROperator.hxx:46

TMVA::Experimental::SOFIE::ROperator::fOutputTensorNames
std::vector< std::string_view > fOutputTensorNames
Definition ROperator.hxx:51

TMVA::Experimental::SOFIE::UTILITY
Definition SOFIE_common.hxx:343

TMVA::Experimental::SOFIE::UTILITY::AreSameShape
bool AreSameShape(const std::vector< size_t > &, const std::vector< size_t > &)
Definition SOFIE_common.cxx:202

TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast
T * UnidirectionalBroadcast(const T *data, const std::vector< size_t > &shape, const std::vector< size_t > &targetShape)
Definition SOFIE_common.hxx:484

TMVA::Experimental::SOFIE
Definition RFunction.hxx:12

TMVA::Experimental::SOFIE::ConvertDimShapeToString
std::string ConvertDimShapeToString(const std::vector< Dim > &shape)
Definition SOFIE_common.cxx:136

TMVA::Experimental::SOFIE::ConvertShapeToLength
std::size_t ConvertShapeToLength(const std::vector< size_t > &shape)
Definition SOFIE_common.cxx:54

TMVA::Experimental::SOFIE::ETensorType
ETensorType
Definition SOFIE_common.hxx:28

TMVA::Experimental::SOFIE::ConvertShapeToInt
std::vector< size_t > ConvertShapeToInt(const std::vector< Dim > &shape)
Convert shape based on Dim to integer format.
Definition SOFIE_common.cxx:28

TMVA::Experimental::SOFIE::ConvertStringToType
ETensorType ConvertStringToType(std::string type)
Definition SOFIE_common.cxx:104

TMVA::Experimental::SOFIE::ConvertDimShapeToLength
std::string ConvertDimShapeToLength(const std::vector< Dim > &shape)
Definition SOFIE_common.cxx:147

TMVA::Experimental::SOFIE::ConvertShapeToString
std::string ConvertShapeToString(const std::vector< size_t > &shape)
Definition SOFIE_common.cxx:125

TMVA::Experimental
Definition RFunction.hxx:11

TMVA
create variable transformations
Definition GeneticMinimizer.h:22

v
@ v
Definition rootcling_impl.cxx:3554

TMVA::Experimental::SOFIE::Dim
Definition SOFIE_common.hxx:63

TMVA::Experimental::SOFIE::Dim::isParam
bool isParam
Definition SOFIE_common.hxx:64

TMVA::Experimental::SOFIE::Dim::param
std::string param
Definition SOFIE_common.hxx:66

TMVA::Experimental::SOFIE::Dim::dim
size_t dim
Definition SOFIE_common.hxx:65

group
Definition TWinNTSystem.h:42