1#ifndef TMVA_SOFIE_ROPERATOR_TRANSPOSE
2#define TMVA_SOFIE_ROPERATOR_TRANSPOSE
39 std::vector<ETensorType>
TypeInference(std::vector<ETensorType> input)
override {
43 std::vector<std::vector<size_t>>
ShapeInference(std::vector<std::vector<size_t>> input)
override {
44 if (input.size() > 1)
throw std::runtime_error(
"TMVA SOFIE Tranpose Op Shape Inference only need 1 input tensor");
45 auto& data = input[0];
47 throw std::runtime_error(
"TMVA SOFIE Tranpose Op - Invalid axes attributes");
49 std::vector<size_t> output_shape(
fAttrPerm.size());
50 for (
size_t i = 0; i <
fAttrPerm.size(); i++){
53 std::vector<std::vector<size_t>>
ret;
54 ret.push_back(output_shape);
69 auto inputData =
static_cast<T *
>(model.GetInitializedTensorData(
fNX).get());
71 std::vector<size_t> outputIdx(dim);
72 std::vector<T> outputData(length);
73 for (
size_t i = 0; i < length; i++) {
74 outputIdx[0] = i / outStrides[0];
75 for (
size_t j = 1; j < dim; j++) {
76 outputIdx[j] = (i % outStrides[j - 1]) / outStrides[j];
79 size_t inputIndex = 0;
80 for (
size_t j = 0; j < dim; j++) {
83 inputIndex += outputIdx[k] * inStrides[j];
85 outputData[i] = inputData[inputIndex];
87 model.AddConstantTensor<T>(
fNY, shapeY, outputData.data());
88 if (model.Verbose()) {
95 if (model.CheckIfTensorAlreadyExist(
fNX) ==
false){
96 std::cout<<
"Input tensor for transpose: "<<
fNX<<
'\n';
97 throw std::runtime_error(
"TMVA SOFIE Tranpose Op Input Tensor is not found in model");
102 for (
int i =
fShapeX.size() - 1; i >= 0; i--){
109 throw std::runtime_error(
"TMVA SOFIE Tranpose Op - Invalid axes attributes");
112 for (
size_t i = 0; i <
fAttrPerm.size(); i++){
116 if (model.IsInitializedTensor(
fNX) ) {
117 auto type = model.GetTensorType(
fNX);
132 std::cout <<
"Transpose - no support for initialized tensor of type " <<
ConvertTypeToString(type) << std::endl;
137 model.AddIntermediateTensor(
fNY, model.GetTensorType(
fNX),
fShapeY);
138 if (model.Verbose()) {
143 std::string
Generate(std::string opName)
override {
145 opName =
"op_" + opName;
147 throw std::runtime_error(
"TMVA SOFIE Transpose Op called to Generate without being initialized first");
154 bool isDynamic = (intShapeX.empty() && rank > 0);
156 std::string constQualifier = (isDynamic) ?
"const" :
"constexpr";
158 std::stringstream out;
169 out <<
SP <<
SP <<
"// Pre-baked input strides (row-major)\n";
170 out <<
SP <<
SP << constQualifier <<
" size_t " << opName <<
"_strX[] = {";
171 for (
size_t i = 0; i < rank; ++i)
172 out << stridesX[i] << (i + 1 < rank ?
", " :
"");
175 out <<
SP <<
SP <<
"// Pre-baked output strides (row-major)\n";
176 out <<
SP <<
SP << constQualifier <<
" size_t " << opName <<
"_strY[] = {";
177 for (
size_t i = 0; i < rank; ++i)
178 out << stridesY[i] << (i + 1 < rank ?
", " :
"");
182 bool innerContiguous = (
fAttrPerm.back() == (int64_t) (rank - 1));
183 size_t outerRank = innerContiguous ? rank - 1 : rank;
184 size_t innerSize = innerContiguous ? (isDynamic ? 0 : intShapeX[
fAttrPerm[rank - 1]])
187 if (innerContiguous && !isDynamic && innerSize > 1) {
190 <<
"// Fast path: last permuted axis is contiguous in source\n";
192 <<
"// Inner " << innerSize <<
" elements copied with pointer arithmetic\n";
198 out <<
SP <<
SP <<
SP <<
"size_t src_off = ";
199 for (
size_t i = 0; i < outerRank; ++i) {
200 out <<
"idx_" << i <<
" * " << opName <<
"_strX["
202 if (i + 1 < outerRank) out <<
" + ";
206 out <<
SP <<
SP <<
SP <<
"size_t dst_off = ";
207 for (
size_t i = 0; i < outerRank; ++i) {
208 out <<
"idx_" << i <<
" * " << opName <<
"_strY[" << i <<
"]";
209 if (i + 1 < outerRank) out <<
" + ";
215 <<
"std::copy(tensor_" <<
fNX <<
" + src_off, "
216 <<
"tensor_" <<
fNX <<
" + src_off + " << innerSize <<
", "
217 <<
"tensor_" <<
fNY <<
" + dst_off);\n";
224 out <<
SP <<
SP <<
"// General N-D transpose\n";
229 out <<
SP <<
SP <<
SP <<
"size_t src_idx = ";
230 for (
size_t i = 0; i < rank; ++i) {
231 out <<
"idx_" << i <<
" * " << opName <<
"_strX[" <<
fAttrPerm[i] <<
"]";
232 if (i + 1 < rank) out <<
" + ";
237 out <<
SP <<
SP <<
SP <<
"size_t dst_idx = ";
238 for (
size_t i = 0; i < rank; ++i) {
239 out <<
"idx_" << i <<
" * " << opName <<
"_strY[" << i <<
"]";
240 if (i + 1 < rank) out <<
" + ";
245 <<
"tensor_" <<
fNY <<
"[dst_idx] = "
246 <<
"tensor_" <<
fNX <<
"[src_idx];\n";
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
std::string Generate(std::string opName) override
void Initialize(RModel &model) override
std::vector< Dim > fShapeX
void ProcessInitializedTensor(RModel &model)
ROperator_Transpose(std::vector< int64_t > attr_perm, std::string nameData, std::string nameOutput)
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input) override
std::vector< Dim > fShapeY
std::vector< int64_t > fAttrPerm
std::vector< std::string_view > fInputTensorNames
bool fIsOutputConstant
flag to identify if operator has a constant output (no need to generate code)
const std::string SP
space used to correctly indent the generated C++ code
std::vector< std::string_view > fOutputTensorNames
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
std::string ConvertDimShapeToString(const std::vector< Dim > &shape)
std::size_t ConvertShapeToLength(const std::vector< size_t > &shape)
std::string ConvertValuesToString(size_t n, const T *data, size_t maxprint=-1)
std::vector< size_t > ConvertShapeToInt(const std::vector< Dim > &shape)
Convert shape based on Dim to integer format.
std::string ConvertTypeToString(ETensorType type)
void EmitNestedLoops(std::stringstream &out, size_t loopRank, const std::vector< Dim > shape)
std::string ConvertShapeToString(const std::vector< size_t > &shape)
void CloseNestedLoops(std::stringstream &out, size_t loopRank)
create variable transformations